author | duke |
Wed, 05 Jul 2017 20:10:48 +0200 | |
changeset 27963 | 88d7c7c376e9 |
parent 22505 | 4523090c9674 |
child 29325 | 0e86e64c66e5 |
permissions | -rw-r--r-- |
1 | 1 |
/* |
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2 |
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. |
1 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
5547
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
20 |
* or visit www.oracle.com if you need additional information or have any |
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
21 |
* questions. |
1 | 22 |
* |
23 |
*/ |
|
24 |
||
7397 | 25 |
#include "precompiled.hpp" |
14626
0cf4eccf130f
8003240: x86: move MacroAssembler into separate file
twisti
parents:
14132
diff
changeset
|
26 |
#include "asm/macroAssembler.hpp" |
0cf4eccf130f
8003240: x86: move MacroAssembler into separate file
twisti
parents:
14132
diff
changeset
|
27 |
#include "asm/macroAssembler.inline.hpp" |
7397 | 28 |
#include "interpreter/interpreter.hpp" |
29 |
#include "nativeInst_x86.hpp" |
|
30 |
#include "oops/instanceOop.hpp" |
|
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
31 |
#include "oops/method.hpp" |
7397 | 32 |
#include "oops/objArrayKlass.hpp" |
33 |
#include "oops/oop.inline.hpp" |
|
34 |
#include "prims/methodHandles.hpp" |
|
35 |
#include "runtime/frame.inline.hpp" |
|
36 |
#include "runtime/handles.inline.hpp" |
|
37 |
#include "runtime/sharedRuntime.hpp" |
|
38 |
#include "runtime/stubCodeGenerator.hpp" |
|
39 |
#include "runtime/stubRoutines.hpp" |
|
14583
d70ee55535f4
8003935: Simplify the needed includes for using Thread::current()
stefank
parents:
14132
diff
changeset
|
40 |
#include "runtime/thread.inline.hpp" |
7397 | 41 |
#include "utilities/top.hpp" |
42 |
#ifdef COMPILER2 |
|
43 |
#include "opto/runtime.hpp" |
|
44 |
#endif |
|
1 | 45 |
|
46 |
// Declaration and definition of StubGenerator (no .hpp file). |
|
47 |
// For a more detailed description of the stub routine structure |
|
48 |
// see the comment in stubRoutines.hpp |
|
49 |
||
50 |
#define __ _masm-> |
|
1066 | 51 |
#define a__ ((Assembler*)_masm)-> |
1 | 52 |
|
53 |
#ifdef PRODUCT |
|
54 |
#define BLOCK_COMMENT(str) /* nothing */ |
|
55 |
#else |
|
56 |
#define BLOCK_COMMENT(str) __ block_comment(str) |
|
57 |
#endif |
|
58 |
||
59 |
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") |
|
60 |
||
61 |
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions |
|
62 |
const int FPU_CNTRL_WRD_MASK = 0xFFFF; |
|
63 |
||
64 |
// ------------------------------------------------------------------------------------------------------------------------- |
|
65 |
// Stub Code definitions |
|
66 |
||
67 |
static address handle_unsafe_access() { |
|
68 |
JavaThread* thread = JavaThread::current(); |
|
69 |
address pc = thread->saved_exception_pc(); |
|
70 |
// pc is the instruction which we must emulate |
|
71 |
// doing a no-op is fine: return garbage from the load |
|
72 |
// therefore, compute npc |
|
73 |
address npc = Assembler::locate_next_instruction(pc); |
|
74 |
||
75 |
// request an async exception |
|
76 |
thread->set_pending_unsafe_access_error(); |
|
77 |
||
78 |
// return address of next instruction to execute |
|
79 |
return npc; |
|
80 |
} |
|
81 |
||
82 |
class StubGenerator: public StubCodeGenerator { |
|
83 |
private: |
|
84 |
||
85 |
#ifdef PRODUCT |
|
18073
f02460441ddc
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
17622
diff
changeset
|
86 |
#define inc_counter_np(counter) ((void)0) |
1 | 87 |
#else |
88 |
void inc_counter_np_(int& counter) { |
|
1066 | 89 |
__ incrementl(ExternalAddress((address)&counter)); |
1 | 90 |
} |
91 |
#define inc_counter_np(counter) \ |
|
92 |
BLOCK_COMMENT("inc_counter " #counter); \ |
|
93 |
inc_counter_np_(counter); |
|
94 |
#endif //PRODUCT |
|
95 |
||
96 |
void inc_copy_counter_np(BasicType t) { |
|
97 |
#ifndef PRODUCT |
|
98 |
switch (t) { |
|
99 |
case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return; |
|
100 |
case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return; |
|
101 |
case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return; |
|
102 |
case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return; |
|
103 |
case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return; |
|
104 |
} |
|
105 |
ShouldNotReachHere(); |
|
106 |
#endif //PRODUCT |
|
107 |
} |
|
108 |
||
109 |
//------------------------------------------------------------------------------------------------------------------------ |
|
110 |
// Call stubs are used to call Java from C |
|
111 |
// |
|
112 |
// [ return_from_Java ] <--- rsp |
|
113 |
// [ argument word n ] |
|
114 |
// ... |
|
115 |
// -N [ argument word 1 ] |
|
116 |
// -7 [ Possible padding for stack alignment ] |
|
117 |
// -6 [ Possible padding for stack alignment ] |
|
118 |
// -5 [ Possible padding for stack alignment ] |
|
119 |
// -4 [ mxcsr save ] <--- rsp_after_call |
|
120 |
// -3 [ saved rbx, ] |
|
121 |
// -2 [ saved rsi ] |
|
122 |
// -1 [ saved rdi ] |
|
123 |
// 0 [ saved rbp, ] <--- rbp, |
|
124 |
// 1 [ return address ] |
|
125 |
// 2 [ ptr. to call wrapper ] |
|
126 |
// 3 [ result ] |
|
127 |
// 4 [ result_type ] |
|
128 |
// 5 [ method ] |
|
129 |
// 6 [ entry_point ] |
|
130 |
// 7 [ parameters ] |
|
131 |
// 8 [ parameter_size ] |
|
132 |
// 9 [ thread ] |
|
133 |
||
134 |
||
135 |
address generate_call_stub(address& return_address) { |
|
136 |
StubCodeMark mark(this, "StubRoutines", "call_stub"); |
|
137 |
address start = __ pc(); |
|
138 |
||
139 |
// stub code parameters / addresses |
|
140 |
assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code"); |
|
141 |
bool sse_save = false; |
|
142 |
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()! |
|
143 |
const int locals_count_in_bytes (4*wordSize); |
|
144 |
const Address mxcsr_save (rbp, -4 * wordSize); |
|
145 |
const Address saved_rbx (rbp, -3 * wordSize); |
|
146 |
const Address saved_rsi (rbp, -2 * wordSize); |
|
147 |
const Address saved_rdi (rbp, -1 * wordSize); |
|
148 |
const Address result (rbp, 3 * wordSize); |
|
149 |
const Address result_type (rbp, 4 * wordSize); |
|
150 |
const Address method (rbp, 5 * wordSize); |
|
151 |
const Address entry_point (rbp, 6 * wordSize); |
|
152 |
const Address parameters (rbp, 7 * wordSize); |
|
153 |
const Address parameter_size(rbp, 8 * wordSize); |
|
154 |
const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! |
|
155 |
sse_save = UseSSE > 0; |
|
156 |
||
157 |
// stub code |
|
158 |
__ enter(); |
|
1066 | 159 |
__ movptr(rcx, parameter_size); // parameter counter |
5419 | 160 |
__ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes |
1066 | 161 |
__ addptr(rcx, locals_count_in_bytes); // reserve space for register saves |
162 |
__ subptr(rsp, rcx); |
|
163 |
__ andptr(rsp, -(StackAlignmentInBytes)); // Align stack |
|
1 | 164 |
|
165 |
// save rdi, rsi, & rbx, according to C calling conventions |
|
1066 | 166 |
__ movptr(saved_rdi, rdi); |
167 |
__ movptr(saved_rsi, rsi); |
|
168 |
__ movptr(saved_rbx, rbx); |
|
1 | 169 |
// save and initialize %mxcsr |
170 |
if (sse_save) { |
|
171 |
Label skip_ldmx; |
|
172 |
__ stmxcsr(mxcsr_save); |
|
173 |
__ movl(rax, mxcsr_save); |
|
174 |
__ andl(rax, MXCSR_MASK); // Only check control and mask bits |
|
175 |
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); |
|
176 |
__ cmp32(rax, mxcsr_std); |
|
177 |
__ jcc(Assembler::equal, skip_ldmx); |
|
178 |
__ ldmxcsr(mxcsr_std); |
|
179 |
__ bind(skip_ldmx); |
|
180 |
} |
|
181 |
||
182 |
// make sure the control word is correct. |
|
183 |
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); |
|
184 |
||
185 |
#ifdef ASSERT |
|
186 |
// make sure we have no pending exceptions |
|
187 |
{ Label L; |
|
1066 | 188 |
__ movptr(rcx, thread); |
189 |
__ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
1 | 190 |
__ jcc(Assembler::equal, L); |
191 |
__ stop("StubRoutines::call_stub: entered with pending exception"); |
|
192 |
__ bind(L); |
|
193 |
} |
|
194 |
#endif |
|
195 |
||
196 |
// pass parameters if any |
|
197 |
BLOCK_COMMENT("pass parameters if any"); |
|
198 |
Label parameters_done; |
|
199 |
__ movl(rcx, parameter_size); // parameter counter |
|
200 |
__ testl(rcx, rcx); |
|
201 |
__ jcc(Assembler::zero, parameters_done); |
|
202 |
||
203 |
// parameter passing loop |
|
204 |
||
205 |
Label loop; |
|
206 |
// Copy Java parameters in reverse order (receiver last) |
|
207 |
// Note that the argument order is inverted in the process |
|
208 |
// source is rdx[rcx: N-1..0] |
|
209 |
// dest is rsp[rbx: 0..N-1] |
|
210 |
||
1066 | 211 |
__ movptr(rdx, parameters); // parameter pointer |
212 |
__ xorptr(rbx, rbx); |
|
1 | 213 |
|
214 |
__ BIND(loop); |
|
215 |
||
216 |
// get parameter |
|
1066 | 217 |
__ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize)); |
218 |
__ movptr(Address(rsp, rbx, Interpreter::stackElementScale(), |
|
1 | 219 |
Interpreter::expr_offset_in_bytes(0)), rax); // store parameter |
220 |
__ increment(rbx); |
|
221 |
__ decrement(rcx); |
|
222 |
__ jcc(Assembler::notZero, loop); |
|
223 |
||
224 |
// call Java function |
|
225 |
__ BIND(parameters_done); |
|
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
226 |
__ movptr(rbx, method); // get Method* |
1066 | 227 |
__ movptr(rax, entry_point); // get entry_point |
228 |
__ mov(rsi, rsp); // set sender sp |
|
1 | 229 |
BLOCK_COMMENT("call Java function"); |
230 |
__ call(rax); |
|
231 |
||
232 |
BLOCK_COMMENT("call_stub_return_address:"); |
|
233 |
return_address = __ pc(); |
|
234 |
||
8315
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
235 |
#ifdef COMPILER2 |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
236 |
{ |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
237 |
Label L_skip; |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
238 |
if (UseSSE >= 2) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
239 |
__ verify_FPU(0, "call_stub_return"); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
240 |
} else { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
241 |
for (int i = 1; i < 8; i++) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
242 |
__ ffree(i); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
243 |
} |
1 | 244 |
|
8315
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
245 |
// UseSSE <= 1 so double result should be left on TOS |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
246 |
__ movl(rsi, result_type); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
247 |
__ cmpl(rsi, T_DOUBLE); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
248 |
__ jcc(Assembler::equal, L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
249 |
if (UseSSE == 0) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
250 |
// UseSSE == 0 so float result should be left on TOS |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
251 |
__ cmpl(rsi, T_FLOAT); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
252 |
__ jcc(Assembler::equal, L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
253 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
254 |
__ ffree(0); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
255 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
256 |
__ BIND(L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
257 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
258 |
#endif // COMPILER2 |
1 | 259 |
|
260 |
// store result depending on type |
|
261 |
// (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) |
|
1066 | 262 |
__ movptr(rdi, result); |
1 | 263 |
Label is_long, is_float, is_double, exit; |
264 |
__ movl(rsi, result_type); |
|
265 |
__ cmpl(rsi, T_LONG); |
|
266 |
__ jcc(Assembler::equal, is_long); |
|
267 |
__ cmpl(rsi, T_FLOAT); |
|
268 |
__ jcc(Assembler::equal, is_float); |
|
269 |
__ cmpl(rsi, T_DOUBLE); |
|
270 |
__ jcc(Assembler::equal, is_double); |
|
271 |
||
272 |
// handle T_INT case |
|
273 |
__ movl(Address(rdi, 0), rax); |
|
274 |
__ BIND(exit); |
|
275 |
||
276 |
// check that FPU stack is empty |
|
277 |
__ verify_FPU(0, "generate_call_stub"); |
|
278 |
||
279 |
// pop parameters |
|
1066 | 280 |
__ lea(rsp, rsp_after_call); |
1 | 281 |
|
282 |
// restore %mxcsr |
|
283 |
if (sse_save) { |
|
284 |
__ ldmxcsr(mxcsr_save); |
|
285 |
} |
|
286 |
||
287 |
// restore rdi, rsi and rbx, |
|
1066 | 288 |
__ movptr(rbx, saved_rbx); |
289 |
__ movptr(rsi, saved_rsi); |
|
290 |
__ movptr(rdi, saved_rdi); |
|
291 |
__ addptr(rsp, 4*wordSize); |
|
1 | 292 |
|
293 |
// return |
|
1066 | 294 |
__ pop(rbp); |
1 | 295 |
__ ret(0); |
296 |
||
297 |
// handle return types different from T_INT |
|
298 |
__ BIND(is_long); |
|
299 |
__ movl(Address(rdi, 0 * wordSize), rax); |
|
300 |
__ movl(Address(rdi, 1 * wordSize), rdx); |
|
301 |
__ jmp(exit); |
|
302 |
||
303 |
__ BIND(is_float); |
|
304 |
// interpreter uses xmm0 for return values |
|
305 |
if (UseSSE >= 1) { |
|
306 |
__ movflt(Address(rdi, 0), xmm0); |
|
307 |
} else { |
|
308 |
__ fstp_s(Address(rdi, 0)); |
|
309 |
} |
|
310 |
__ jmp(exit); |
|
311 |
||
312 |
__ BIND(is_double); |
|
313 |
// interpreter uses xmm0 for return values |
|
314 |
if (UseSSE >= 2) { |
|
315 |
__ movdbl(Address(rdi, 0), xmm0); |
|
316 |
} else { |
|
317 |
__ fstp_d(Address(rdi, 0)); |
|
318 |
} |
|
319 |
__ jmp(exit); |
|
320 |
||
321 |
return start; |
|
322 |
} |
|
323 |
||
324 |
||
325 |
//------------------------------------------------------------------------------------------------------------------------ |
|
326 |
// Return point for a Java call if there's an exception thrown in Java code. |
|
327 |
// The exception is caught and transformed into a pending exception stored in |
|
328 |
// JavaThread that can be tested from within the VM. |
|
329 |
// |
|
330 |
// Note: Usually the parameters are removed by the callee. In case of an exception |
|
331 |
// crossing an activation frame boundary, that is not the case if the callee |
|
332 |
// is compiled code => need to setup the rsp. |
|
333 |
// |
|
334 |
// rax,: exception oop |
|
335 |
||
336 |
address generate_catch_exception() { |
|
337 |
StubCodeMark mark(this, "StubRoutines", "catch_exception"); |
|
338 |
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()! |
|
339 |
const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()! |
|
340 |
address start = __ pc(); |
|
341 |
||
342 |
// get thread directly |
|
1066 | 343 |
__ movptr(rcx, thread); |
1 | 344 |
#ifdef ASSERT |
345 |
// verify that threads correspond |
|
346 |
{ Label L; |
|
347 |
__ get_thread(rbx); |
|
1066 | 348 |
__ cmpptr(rbx, rcx); |
1 | 349 |
__ jcc(Assembler::equal, L); |
350 |
__ stop("StubRoutines::catch_exception: threads must correspond"); |
|
351 |
__ bind(L); |
|
352 |
} |
|
353 |
#endif |
|
354 |
// set pending exception |
|
355 |
__ verify_oop(rax); |
|
1066 | 356 |
__ movptr(Address(rcx, Thread::pending_exception_offset()), rax ); |
1 | 357 |
__ lea(Address(rcx, Thread::exception_file_offset ()), |
358 |
ExternalAddress((address)__FILE__)); |
|
359 |
__ movl(Address(rcx, Thread::exception_line_offset ()), __LINE__ ); |
|
360 |
// complete return to VM |
|
361 |
assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); |
|
362 |
__ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); |
|
363 |
||
364 |
return start; |
|
365 |
} |
|
366 |
||
367 |
||
368 |
//------------------------------------------------------------------------------------------------------------------------ |
|
369 |
// Continuation point for runtime calls returning with a pending exception. |
|
370 |
// The pending exception check happened in the runtime or native call stub. |
|
371 |
// The pending exception in Thread is converted into a Java-level exception. |
|
372 |
// |
|
373 |
// Contract with Java-level exception handlers: |
|
5046 | 374 |
// rax: exception |
1 | 375 |
// rdx: throwing pc |
376 |
// |
|
377 |
// NOTE: At entry of this stub, exception-pc must be on stack !! |
|
378 |
||
379 |
address generate_forward_exception() { |
|
380 |
StubCodeMark mark(this, "StubRoutines", "forward exception"); |
|
381 |
address start = __ pc(); |
|
5046 | 382 |
const Register thread = rcx; |
383 |
||
384 |
// other registers used in this stub |
|
385 |
const Register exception_oop = rax; |
|
386 |
const Register handler_addr = rbx; |
|
387 |
const Register exception_pc = rdx; |
|
1 | 388 |
|
389 |
// Upon entry, the sp points to the return address returning into Java |
|
390 |
// (interpreted or compiled) code; i.e., the return address becomes the |
|
391 |
// throwing pc. |
|
392 |
// |
|
393 |
// Arguments pushed before the runtime call are still on the stack but |
|
394 |
// the exception handler will reset the stack pointer -> ignore them. |
|
395 |
// A potential result in registers can be ignored as well. |
|
396 |
||
397 |
#ifdef ASSERT |
|
398 |
// make sure this code is only executed if there is a pending exception |
|
399 |
{ Label L; |
|
5046 | 400 |
__ get_thread(thread); |
401 |
__ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
1 | 402 |
__ jcc(Assembler::notEqual, L); |
403 |
__ stop("StubRoutines::forward exception: no pending exception (1)"); |
|
404 |
__ bind(L); |
|
405 |
} |
|
406 |
#endif |
|
407 |
||
408 |
// compute exception handler into rbx, |
|
5046 | 409 |
__ get_thread(thread); |
410 |
__ movptr(exception_pc, Address(rsp, 0)); |
|
1 | 411 |
BLOCK_COMMENT("call exception_handler_for_return_address"); |
5046 | 412 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc); |
413 |
__ mov(handler_addr, rax); |
|
1 | 414 |
|
5046 | 415 |
// setup rax & rdx, remove return address & clear pending exception |
416 |
__ get_thread(thread); |
|
417 |
__ pop(exception_pc); |
|
418 |
__ movptr(exception_oop, Address(thread, Thread::pending_exception_offset())); |
|
419 |
__ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); |
|
1 | 420 |
|
421 |
#ifdef ASSERT |
|
422 |
// make sure exception is set |
|
423 |
{ Label L; |
|
5046 | 424 |
__ testptr(exception_oop, exception_oop); |
1 | 425 |
__ jcc(Assembler::notEqual, L); |
426 |
__ stop("StubRoutines::forward exception: no pending exception (2)"); |
|
427 |
__ bind(L); |
|
428 |
} |
|
429 |
#endif |
|
430 |
||
5046 | 431 |
// Verify that there is really a valid exception in RAX. |
432 |
__ verify_oop(exception_oop); |
|
433 |
||
1 | 434 |
// continue at exception handler (return address removed) |
5046 | 435 |
// rax: exception |
436 |
// rbx: exception handler |
|
1 | 437 |
// rdx: throwing pc |
5046 | 438 |
__ jmp(handler_addr); |
1 | 439 |
|
440 |
return start; |
|
441 |
} |
|
442 |
||
443 |
||
444 |
//---------------------------------------------------------------------------------------------------- |
|
445 |
// Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest) |
|
446 |
// |
|
447 |
// xchg exists as far back as 8086, lock needed for MP only |
|
448 |
// Stack layout immediately after call: |
|
449 |
// |
|
450 |
// 0 [ret addr ] <--- rsp |
|
451 |
// 1 [ ex ] |
|
452 |
// 2 [ dest ] |
|
453 |
// |
|
454 |
// Result: *dest <- ex, return (old *dest) |
|
455 |
// |
|
456 |
// Note: win32 does not currently use this code |
|
457 |
||
458 |
address generate_atomic_xchg() { |
|
459 |
StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); |
|
460 |
address start = __ pc(); |
|
461 |
||
1066 | 462 |
__ push(rdx); |
1 | 463 |
Address exchange(rsp, 2 * wordSize); |
464 |
Address dest_addr(rsp, 3 * wordSize); |
|
465 |
__ movl(rax, exchange); |
|
1066 | 466 |
__ movptr(rdx, dest_addr); |
467 |
__ xchgl(rax, Address(rdx, 0)); |
|
468 |
__ pop(rdx); |
|
1 | 469 |
__ ret(0); |
470 |
||
471 |
return start; |
|
472 |
} |
|
473 |
||
474 |
//---------------------------------------------------------------------------------------------------- |
|
475 |
// Support for void verify_mxcsr() |
|
476 |
// |
|
477 |
// This routine is used with -Xcheck:jni to verify that native |
|
478 |
// JNI code does not return to Java code without restoring the |
|
479 |
// MXCSR register to our expected state. |
|
480 |
||
481 |
||
482 |
address generate_verify_mxcsr() { |
|
483 |
StubCodeMark mark(this, "StubRoutines", "verify_mxcsr"); |
|
484 |
address start = __ pc(); |
|
485 |
||
486 |
const Address mxcsr_save(rsp, 0); |
|
487 |
||
488 |
if (CheckJNICalls && UseSSE > 0 ) { |
|
489 |
Label ok_ret; |
|
490 |
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); |
|
1066 | 491 |
__ push(rax); |
492 |
__ subptr(rsp, wordSize); // allocate a temp location |
|
1 | 493 |
__ stmxcsr(mxcsr_save); |
494 |
__ movl(rax, mxcsr_save); |
|
495 |
__ andl(rax, MXCSR_MASK); |
|
496 |
__ cmp32(rax, mxcsr_std); |
|
497 |
__ jcc(Assembler::equal, ok_ret); |
|
498 |
||
499 |
__ warn("MXCSR changed by native JNI code."); |
|
500 |
||
501 |
__ ldmxcsr(mxcsr_std); |
|
502 |
||
503 |
__ bind(ok_ret); |
|
1066 | 504 |
__ addptr(rsp, wordSize); |
505 |
__ pop(rax); |
|
1 | 506 |
} |
507 |
||
508 |
__ ret(0); |
|
509 |
||
510 |
return start; |
|
511 |
} |
|
512 |
||
513 |
||
514 |
//--------------------------------------------------------------------------- |
|
515 |
// Support for void verify_fpu_cntrl_wrd() |
|
516 |
// |
|
517 |
// This routine is used with -Xcheck:jni to verify that native |
|
518 |
// JNI code does not return to Java code without restoring the |
|
519 |
// FP control word to our expected state. |
|
520 |
||
521 |
address generate_verify_fpu_cntrl_wrd() { |
|
522 |
StubCodeMark mark(this, "StubRoutines", "verify_spcw"); |
|
523 |
address start = __ pc(); |
|
524 |
||
525 |
const Address fpu_cntrl_wrd_save(rsp, 0); |
|
526 |
||
527 |
if (CheckJNICalls) { |
|
528 |
Label ok_ret; |
|
1066 | 529 |
__ push(rax); |
530 |
__ subptr(rsp, wordSize); // allocate a temp location |
|
1 | 531 |
__ fnstcw(fpu_cntrl_wrd_save); |
532 |
__ movl(rax, fpu_cntrl_wrd_save); |
|
533 |
__ andl(rax, FPU_CNTRL_WRD_MASK); |
|
534 |
ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std()); |
|
535 |
__ cmp32(rax, fpu_std); |
|
536 |
__ jcc(Assembler::equal, ok_ret); |
|
537 |
||
538 |
__ warn("Floating point control word changed by native JNI code."); |
|
539 |
||
540 |
__ fldcw(fpu_std); |
|
541 |
||
542 |
__ bind(ok_ret); |
|
1066 | 543 |
__ addptr(rsp, wordSize); |
544 |
__ pop(rax); |
|
1 | 545 |
} |
546 |
||
547 |
__ ret(0); |
|
548 |
||
549 |
return start; |
|
550 |
} |
|
551 |
||
552 |
//--------------------------------------------------------------------------- |
|
553 |
// Wrapper for slow-case handling of double-to-integer conversion |
|
554 |
// d2i or f2i fast case failed either because it is nan or because |
|
555 |
// of under/overflow. |
|
556 |
// Input: FPU TOS: float value |
|
557 |
// Output: rax, (rdx): integer (long) result |
|
558 |
||
559 |
address generate_d2i_wrapper(BasicType t, address fcn) { |
|
560 |
StubCodeMark mark(this, "StubRoutines", "d2i_wrapper"); |
|
561 |
address start = __ pc(); |
|
562 |
||
563 |
// Capture info about frame layout |
|
564 |
enum layout { FPUState_off = 0, |
|
565 |
rbp_off = FPUStateSizeInWords, |
|
566 |
rdi_off, |
|
567 |
rsi_off, |
|
568 |
rcx_off, |
|
569 |
rbx_off, |
|
570 |
saved_argument_off, |
|
571 |
saved_argument_off2, // 2nd half of double |
|
572 |
framesize |
|
573 |
}; |
|
574 |
||
575 |
assert(FPUStateSizeInWords == 27, "update stack layout"); |
|
576 |
||
577 |
// Save outgoing argument to stack across push_FPU_state() |
|
1066 | 578 |
__ subptr(rsp, wordSize * 2); |
1 | 579 |
__ fstp_d(Address(rsp, 0)); |
580 |
||
581 |
// Save CPU & FPU state |
|
1066 | 582 |
__ push(rbx); |
583 |
__ push(rcx); |
|
584 |
__ push(rsi); |
|
585 |
__ push(rdi); |
|
586 |
__ push(rbp); |
|
1 | 587 |
__ push_FPU_state(); |
588 |
||
589 |
// push_FPU_state() resets the FP top of stack |
|
590 |
// Load original double into FP top of stack |
|
591 |
__ fld_d(Address(rsp, saved_argument_off * wordSize)); |
|
592 |
// Store double into stack as outgoing argument |
|
1066 | 593 |
__ subptr(rsp, wordSize*2); |
1 | 594 |
__ fst_d(Address(rsp, 0)); |
595 |
||
596 |
// Prepare FPU for doing math in C-land |
|
597 |
__ empty_FPU_stack(); |
|
598 |
// Call the C code to massage the double. Result in EAX |
|
599 |
if (t == T_INT) |
|
600 |
{ BLOCK_COMMENT("SharedRuntime::d2i"); } |
|
601 |
else if (t == T_LONG) |
|
602 |
{ BLOCK_COMMENT("SharedRuntime::d2l"); } |
|
603 |
__ call_VM_leaf( fcn, 2 ); |
|
604 |
||
605 |
// Restore CPU & FPU state |
|
606 |
__ pop_FPU_state(); |
|
1066 | 607 |
__ pop(rbp); |
608 |
__ pop(rdi); |
|
609 |
__ pop(rsi); |
|
610 |
__ pop(rcx); |
|
611 |
__ pop(rbx); |
|
612 |
__ addptr(rsp, wordSize * 2); |
|
1 | 613 |
|
614 |
__ ret(0); |
|
615 |
||
616 |
return start; |
|
617 |
} |
|
618 |
||
619 |
||
620 |
//--------------------------------------------------------------------------- |
|
621 |
// The following routine generates a subroutine to throw an asynchronous |
|
622 |
// UnknownError when an unsafe access gets a fault that could not be |
|
623 |
// reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) |
|
624 |
address generate_handler_for_unsafe_access() { |
|
625 |
StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); |
|
626 |
address start = __ pc(); |
|
627 |
||
1066 | 628 |
__ push(0); // hole for return address-to-be |
629 |
__ pusha(); // push registers |
|
1 | 630 |
Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord); |
631 |
BLOCK_COMMENT("call handle_unsafe_access"); |
|
632 |
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access))); |
|
1066 | 633 |
__ movptr(next_pc, rax); // stuff next address |
634 |
__ popa(); |
|
1 | 635 |
__ ret(0); // jump to next address |
636 |
||
637 |
return start; |
|
638 |
} |
|
639 |
||
640 |
||
641 |
//---------------------------------------------------------------------------------------------------- |
|
642 |
// Non-destructive plausibility checks for oops |
|
643 |
||
644 |
address generate_verify_oop() { |
|
645 |
StubCodeMark mark(this, "StubRoutines", "verify_oop"); |
|
646 |
address start = __ pc(); |
|
647 |
||
648 |
// Incoming arguments on stack after saving rax,: |
|
649 |
// |
|
650 |
// [tos ]: saved rdx |
|
651 |
// [tos + 1]: saved EFLAGS |
|
652 |
// [tos + 2]: return address |
|
653 |
// [tos + 3]: char* error message |
|
654 |
// [tos + 4]: oop object to verify |
|
655 |
// [tos + 5]: saved rax, - saved by caller and bashed |
|
656 |
||
657 |
Label exit, error; |
|
1066 | 658 |
__ pushf(); |
659 |
__ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); |
|
660 |
__ push(rdx); // save rdx |
|
1 | 661 |
// make sure object is 'reasonable' |
1066 | 662 |
__ movptr(rax, Address(rsp, 4 * wordSize)); // get object |
663 |
__ testptr(rax, rax); |
|
1 | 664 |
__ jcc(Assembler::zero, exit); // if obj is NULL it is ok |
665 |
||
666 |
// Check if the oop is in the right area of memory |
|
667 |
const int oop_mask = Universe::verify_oop_mask(); |
|
668 |
const int oop_bits = Universe::verify_oop_bits(); |
|
1066 | 669 |
__ mov(rdx, rax); |
670 |
__ andptr(rdx, oop_mask); |
|
671 |
__ cmpptr(rdx, oop_bits); |
|
1 | 672 |
__ jcc(Assembler::notZero, error); |
673 |
||
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
674 |
// make sure klass is 'reasonable', which is not zero. |
1066 | 675 |
__ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass |
676 |
__ testptr(rax, rax); |
|
1 | 677 |
__ jcc(Assembler::zero, error); // if klass is NULL it is broken |
678 |
||
679 |
// return if everything seems ok |
|
680 |
__ bind(exit); |
|
1066 | 681 |
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
682 |
__ pop(rdx); // restore rdx |
|
683 |
__ popf(); // restore EFLAGS |
|
1 | 684 |
__ ret(3 * wordSize); // pop arguments |
685 |
||
686 |
// handle errors |
|
687 |
__ bind(error); |
|
1066 | 688 |
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
689 |
__ pop(rdx); // get saved rdx back |
|
690 |
__ popf(); // get saved EFLAGS off stack -- will be ignored |
|
691 |
__ pusha(); // push registers (eip = return address & msg are already pushed) |
|
1 | 692 |
BLOCK_COMMENT("call MacroAssembler::debug"); |
1066 | 693 |
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); |
694 |
__ popa(); |
|
1 | 695 |
__ ret(3 * wordSize); // pop arguments |
696 |
return start; |
|
697 |
} |
|
698 |
||
699 |
// |
|
700 |
// Generate pre-barrier for array stores |
|
701 |
// |
|
702 |
// Input: |
|
703 |
// start - starting address |
|
3262
30d1c247fc25
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
2534
diff
changeset
|
704 |
// count - element count |
8498 | 705 |
void gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) { |
1 | 706 |
assert_different_registers(start, count); |
707 |
BarrierSet* bs = Universe::heap()->barrier_set(); |
|
708 |
switch (bs->kind()) { |
|
709 |
case BarrierSet::G1SATBCT: |
|
710 |
case BarrierSet::G1SATBCTLogging: |
|
8498 | 711 |
// With G1, don't generate the call if we statically know that the target in uninitialized |
712 |
if (!uninitialized_target) { |
|
713 |
__ pusha(); // push registers |
|
714 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), |
|
715 |
start, count); |
|
716 |
__ popa(); |
|
717 |
} |
|
1 | 718 |
break; |
719 |
case BarrierSet::CardTableModRef: |
|
720 |
case BarrierSet::CardTableExtension: |
|
721 |
case BarrierSet::ModRef: |
|
722 |
break; |
|
723 |
default : |
|
724 |
ShouldNotReachHere(); |
|
725 |
||
726 |
} |
|
727 |
} |
|
728 |
||
729 |
||
730 |
// |
|
731 |
// Generate a post-barrier for an array store |
|
732 |
// |
|
733 |
// start - starting address |
|
734 |
// count - element count |
|
735 |
// |
|
736 |
// The two input registers are overwritten. |
|
737 |
// |
|
738 |
void gen_write_ref_array_post_barrier(Register start, Register count) { |
|
739 |
BarrierSet* bs = Universe::heap()->barrier_set(); |
|
740 |
assert_different_registers(start, count); |
|
741 |
switch (bs->kind()) { |
|
742 |
case BarrierSet::G1SATBCT: |
|
743 |
case BarrierSet::G1SATBCTLogging: |
|
744 |
{ |
|
1066 | 745 |
__ pusha(); // push registers |
4740
d708800308b7
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
4645
diff
changeset
|
746 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), |
d708800308b7
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
4645
diff
changeset
|
747 |
start, count); |
1066 | 748 |
__ popa(); |
1 | 749 |
} |
750 |
break; |
|
751 |
||
752 |
case BarrierSet::CardTableModRef: |
|
753 |
case BarrierSet::CardTableExtension: |
|
754 |
{ |
|
755 |
CardTableModRefBS* ct = (CardTableModRefBS*)bs; |
|
756 |
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
|
757 |
||
758 |
Label L_loop; |
|
759 |
const Register end = count; // elements count; end == start+count-1 |
|
760 |
assert_different_registers(start, end); |
|
761 |
||
1066 | 762 |
__ lea(end, Address(start, count, Address::times_ptr, -wordSize)); |
763 |
__ shrptr(start, CardTableModRefBS::card_shift); |
|
764 |
__ shrptr(end, CardTableModRefBS::card_shift); |
|
765 |
__ subptr(end, start); // end --> count |
|
1 | 766 |
__ BIND(L_loop); |
957
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
767 |
intptr_t disp = (intptr_t) ct->byte_map_base; |
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
768 |
Address cardtable(start, count, Address::times_1, disp); |
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
769 |
__ movb(cardtable, 0); |
1 | 770 |
__ decrement(count); |
771 |
__ jcc(Assembler::greaterEqual, L_loop); |
|
772 |
} |
|
773 |
break; |
|
774 |
case BarrierSet::ModRef: |
|
775 |
break; |
|
776 |
default : |
|
777 |
ShouldNotReachHere(); |
|
778 |
||
779 |
} |
|
780 |
} |
|
781 |
||
1437 | 782 |
|
783 |
// Copy 64 bytes chunks |
|
784 |
// |
|
785 |
// Inputs: |
|
786 |
// from - source array address |
|
787 |
// to_from - destination array address - from |
|
788 |
// qword_count - 8-bytes element count, negative |
|
789 |
// |
|
790 |
void xmm_copy_forward(Register from, Register to_from, Register qword_count) { |
|
791 |
assert( UseSSE >= 2, "supported cpu only" ); |
|
792 |
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
|
793 |
// Copy 64-byte chunks |
|
794 |
__ jmpb(L_copy_64_bytes); |
|
5249 | 795 |
__ align(OptoLoopAlignment); |
1437 | 796 |
__ BIND(L_copy_64_bytes_loop); |
797 |
||
15115
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
798 |
if (UseUnalignedLoadStores) { |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
799 |
if (UseAVX >= 2) { |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
800 |
__ vmovdqu(xmm0, Address(from, 0)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
801 |
__ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
802 |
__ vmovdqu(xmm1, Address(from, 32)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
803 |
__ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
804 |
} else { |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
805 |
__ movdqu(xmm0, Address(from, 0)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
806 |
__ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
807 |
__ movdqu(xmm1, Address(from, 16)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
808 |
__ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
809 |
__ movdqu(xmm2, Address(from, 32)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
810 |
__ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
811 |
__ movdqu(xmm3, Address(from, 48)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
812 |
__ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
813 |
} |
1437 | 814 |
} else { |
815 |
__ movq(xmm0, Address(from, 0)); |
|
816 |
__ movq(Address(from, to_from, Address::times_1, 0), xmm0); |
|
817 |
__ movq(xmm1, Address(from, 8)); |
|
818 |
__ movq(Address(from, to_from, Address::times_1, 8), xmm1); |
|
819 |
__ movq(xmm2, Address(from, 16)); |
|
820 |
__ movq(Address(from, to_from, Address::times_1, 16), xmm2); |
|
821 |
__ movq(xmm3, Address(from, 24)); |
|
822 |
__ movq(Address(from, to_from, Address::times_1, 24), xmm3); |
|
823 |
__ movq(xmm4, Address(from, 32)); |
|
824 |
__ movq(Address(from, to_from, Address::times_1, 32), xmm4); |
|
825 |
__ movq(xmm5, Address(from, 40)); |
|
826 |
__ movq(Address(from, to_from, Address::times_1, 40), xmm5); |
|
827 |
__ movq(xmm6, Address(from, 48)); |
|
828 |
__ movq(Address(from, to_from, Address::times_1, 48), xmm6); |
|
829 |
__ movq(xmm7, Address(from, 56)); |
|
830 |
__ movq(Address(from, to_from, Address::times_1, 56), xmm7); |
|
831 |
} |
|
832 |
||
833 |
__ addl(from, 64); |
|
834 |
__ BIND(L_copy_64_bytes); |
|
835 |
__ subl(qword_count, 8); |
|
836 |
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); |
|
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
837 |
|
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
838 |
if (UseUnalignedLoadStores && (UseAVX >= 2)) { |
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
839 |
// clean upper bits of YMM registers |
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
840 |
__ vzeroupper(); |
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
841 |
} |
1437 | 842 |
__ addl(qword_count, 8); |
843 |
__ jccb(Assembler::zero, L_exit); |
|
844 |
// |
|
845 |
// length is too short, just copy qwords |
|
846 |
// |
|
847 |
__ BIND(L_copy_8_bytes); |
|
848 |
__ movq(xmm0, Address(from, 0)); |
|
849 |
__ movq(Address(from, to_from, Address::times_1), xmm0); |
|
850 |
__ addl(from, 8); |
|
851 |
__ decrement(qword_count); |
|
852 |
__ jcc(Assembler::greater, L_copy_8_bytes); |
|
853 |
__ BIND(L_exit); |
|
854 |
} |
|
855 |
||
1 | 856 |
// Copy 64 bytes chunks |
857 |
// |
|
858 |
// Inputs: |
|
859 |
// from - source array address |
|
860 |
// to_from - destination array address - from |
|
861 |
// qword_count - 8-bytes element count, negative |
|
862 |
// |
|
863 |
void mmx_copy_forward(Register from, Register to_from, Register qword_count) { |
|
1437 | 864 |
assert( VM_Version::supports_mmx(), "supported cpu only" ); |
1 | 865 |
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
866 |
// Copy 64-byte chunks |
|
867 |
__ jmpb(L_copy_64_bytes); |
|
5249 | 868 |
__ align(OptoLoopAlignment); |
1 | 869 |
__ BIND(L_copy_64_bytes_loop); |
870 |
__ movq(mmx0, Address(from, 0)); |
|
871 |
__ movq(mmx1, Address(from, 8)); |
|
872 |
__ movq(mmx2, Address(from, 16)); |
|
873 |
__ movq(Address(from, to_from, Address::times_1, 0), mmx0); |
|
874 |
__ movq(mmx3, Address(from, 24)); |
|
875 |
__ movq(Address(from, to_from, Address::times_1, 8), mmx1); |
|
876 |
__ movq(mmx4, Address(from, 32)); |
|
877 |
__ movq(Address(from, to_from, Address::times_1, 16), mmx2); |
|
878 |
__ movq(mmx5, Address(from, 40)); |
|
879 |
__ movq(Address(from, to_from, Address::times_1, 24), mmx3); |
|
880 |
__ movq(mmx6, Address(from, 48)); |
|
881 |
__ movq(Address(from, to_from, Address::times_1, 32), mmx4); |
|
882 |
__ movq(mmx7, Address(from, 56)); |
|
883 |
__ movq(Address(from, to_from, Address::times_1, 40), mmx5); |
|
884 |
__ movq(Address(from, to_from, Address::times_1, 48), mmx6); |
|
885 |
__ movq(Address(from, to_from, Address::times_1, 56), mmx7); |
|
1066 | 886 |
__ addptr(from, 64); |
1 | 887 |
__ BIND(L_copy_64_bytes); |
888 |
__ subl(qword_count, 8); |
|
889 |
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); |
|
890 |
__ addl(qword_count, 8); |
|
891 |
__ jccb(Assembler::zero, L_exit); |
|
892 |
// |
|
893 |
// length is too short, just copy qwords |
|
894 |
// |
|
895 |
__ BIND(L_copy_8_bytes); |
|
896 |
__ movq(mmx0, Address(from, 0)); |
|
897 |
__ movq(Address(from, to_from, Address::times_1), mmx0); |
|
1066 | 898 |
__ addptr(from, 8); |
1 | 899 |
__ decrement(qword_count); |
900 |
__ jcc(Assembler::greater, L_copy_8_bytes); |
|
901 |
__ BIND(L_exit); |
|
902 |
__ emms(); |
|
903 |
} |
|
904 |
||
905 |
address generate_disjoint_copy(BasicType t, bool aligned, |
|
906 |
Address::ScaleFactor sf, |
|
8498 | 907 |
address* entry, const char *name, |
908 |
bool dest_uninitialized = false) { |
|
1 | 909 |
__ align(CodeEntryAlignment); |
910 |
StubCodeMark mark(this, "StubRoutines", name); |
|
911 |
address start = __ pc(); |
|
912 |
||
913 |
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; |
|
914 |
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes; |
|
915 |
||
1066 | 916 |
int shift = Address::times_ptr - sf; |
1 | 917 |
|
918 |
const Register from = rsi; // source array address |
|
919 |
const Register to = rdi; // destination array address |
|
920 |
const Register count = rcx; // elements count |
|
921 |
const Register to_from = to; // (to - from) |
|
922 |
const Register saved_to = rdx; // saved destination array address |
|
923 |
||
924 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 925 |
__ push(rsi); |
926 |
__ push(rdi); |
|
927 |
__ movptr(from , Address(rsp, 12+ 4)); |
|
928 |
__ movptr(to , Address(rsp, 12+ 8)); |
|
1 | 929 |
__ movl(count, Address(rsp, 12+ 12)); |
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
930 |
|
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
931 |
if (entry != NULL) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
932 |
*entry = __ pc(); // Entry point from conjoint arraycopy stub. |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
933 |
BLOCK_COMMENT("Entry:"); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
934 |
} |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
935 |
|
1 | 936 |
if (t == T_OBJECT) { |
937 |
__ testl(count, count); |
|
938 |
__ jcc(Assembler::zero, L_0_count); |
|
8498 | 939 |
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
1066 | 940 |
__ mov(saved_to, to); // save 'to' |
1 | 941 |
} |
942 |
||
1066 | 943 |
__ subptr(to, from); // to --> to_from |
1 | 944 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
945 |
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp |
|
1437 | 946 |
if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { |
1 | 947 |
// align source address at 4 bytes address boundary |
948 |
if (t == T_BYTE) { |
|
949 |
// One byte misalignment happens only for byte arrays |
|
950 |
__ testl(from, 1); |
|
951 |
__ jccb(Assembler::zero, L_skip_align1); |
|
952 |
__ movb(rax, Address(from, 0)); |
|
953 |
__ movb(Address(from, to_from, Address::times_1, 0), rax); |
|
954 |
__ increment(from); |
|
955 |
__ decrement(count); |
|
956 |
__ BIND(L_skip_align1); |
|
957 |
} |
|
958 |
// Two bytes misalignment happens only for byte and short (char) arrays |
|
959 |
__ testl(from, 2); |
|
960 |
__ jccb(Assembler::zero, L_skip_align2); |
|
961 |
__ movw(rax, Address(from, 0)); |
|
962 |
__ movw(Address(from, to_from, Address::times_1, 0), rax); |
|
1066 | 963 |
__ addptr(from, 2); |
1 | 964 |
__ subl(count, 1<<(shift-1)); |
965 |
__ BIND(L_skip_align2); |
|
966 |
} |
|
967 |
if (!VM_Version::supports_mmx()) { |
|
1066 | 968 |
__ mov(rax, count); // save 'count' |
969 |
__ shrl(count, shift); // bytes count |
|
970 |
__ addptr(to_from, from);// restore 'to' |
|
971 |
__ rep_mov(); |
|
972 |
__ subptr(to_from, from);// restore 'to_from' |
|
973 |
__ mov(count, rax); // restore 'count' |
|
1 | 974 |
__ jmpb(L_copy_2_bytes); // all dwords were copied |
975 |
} else { |
|
1437 | 976 |
if (!UseUnalignedLoadStores) { |
977 |
// align to 8 bytes, we know we are 4 byte aligned to start |
|
978 |
__ testptr(from, 4); |
|
979 |
__ jccb(Assembler::zero, L_copy_64_bytes); |
|
980 |
__ movl(rax, Address(from, 0)); |
|
981 |
__ movl(Address(from, to_from, Address::times_1, 0), rax); |
|
982 |
__ addptr(from, 4); |
|
983 |
__ subl(count, 1<<shift); |
|
984 |
} |
|
1 | 985 |
__ BIND(L_copy_64_bytes); |
1066 | 986 |
__ mov(rax, count); |
1 | 987 |
__ shrl(rax, shift+1); // 8 bytes chunk count |
988 |
// |
|
989 |
// Copy 8-byte chunks through MMX registers, 8 per iteration of the loop |
|
990 |
// |
|
1437 | 991 |
if (UseXMMForArrayCopy) { |
992 |
xmm_copy_forward(from, to_from, rax); |
|
993 |
} else { |
|
994 |
mmx_copy_forward(from, to_from, rax); |
|
995 |
} |
|
1 | 996 |
} |
997 |
// copy tailing dword |
|
998 |
__ BIND(L_copy_4_bytes); |
|
999 |
__ testl(count, 1<<shift); |
|
1000 |
__ jccb(Assembler::zero, L_copy_2_bytes); |
|
1001 |
__ movl(rax, Address(from, 0)); |
|
1002 |
__ movl(Address(from, to_from, Address::times_1, 0), rax); |
|
1003 |
if (t == T_BYTE || t == T_SHORT) { |
|
1066 | 1004 |
__ addptr(from, 4); |
1 | 1005 |
__ BIND(L_copy_2_bytes); |
1006 |
// copy tailing word |
|
1007 |
__ testl(count, 1<<(shift-1)); |
|
1008 |
__ jccb(Assembler::zero, L_copy_byte); |
|
1009 |
__ movw(rax, Address(from, 0)); |
|
1010 |
__ movw(Address(from, to_from, Address::times_1, 0), rax); |
|
1011 |
if (t == T_BYTE) { |
|
1066 | 1012 |
__ addptr(from, 2); |
1 | 1013 |
__ BIND(L_copy_byte); |
1014 |
// copy tailing byte |
|
1015 |
__ testl(count, 1); |
|
1016 |
__ jccb(Assembler::zero, L_exit); |
|
1017 |
__ movb(rax, Address(from, 0)); |
|
1018 |
__ movb(Address(from, to_from, Address::times_1, 0), rax); |
|
1019 |
__ BIND(L_exit); |
|
1020 |
} else { |
|
1021 |
__ BIND(L_copy_byte); |
|
1022 |
} |
|
1023 |
} else { |
|
1024 |
__ BIND(L_copy_2_bytes); |
|
1025 |
} |
|
1026 |
||
1027 |
if (t == T_OBJECT) { |
|
1028 |
__ movl(count, Address(rsp, 12+12)); // reread 'count' |
|
1066 | 1029 |
__ mov(to, saved_to); // restore 'to' |
1 | 1030 |
gen_write_ref_array_post_barrier(to, count); |
1031 |
__ BIND(L_0_count); |
|
1032 |
} |
|
1033 |
inc_copy_counter_np(t); |
|
1066 | 1034 |
__ pop(rdi); |
1035 |
__ pop(rsi); |
|
1 | 1036 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1066 | 1037 |
__ xorptr(rax, rax); // return 0 |
1 | 1038 |
__ ret(0); |
1039 |
return start; |
|
1040 |
} |
|
1041 |
||
1042 |
||
6433 | 1043 |
address generate_fill(BasicType t, bool aligned, const char *name) { |
1044 |
__ align(CodeEntryAlignment); |
|
1045 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1046 |
address start = __ pc(); |
|
1047 |
||
1048 |
BLOCK_COMMENT("Entry:"); |
|
1049 |
||
1050 |
const Register to = rdi; // source array address |
|
1051 |
const Register value = rdx; // value |
|
1052 |
const Register count = rsi; // elements count |
|
1053 |
||
1054 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1055 |
__ push(rsi); |
|
1056 |
__ push(rdi); |
|
1057 |
__ movptr(to , Address(rsp, 12+ 4)); |
|
1058 |
__ movl(value, Address(rsp, 12+ 8)); |
|
1059 |
__ movl(count, Address(rsp, 12+ 12)); |
|
1060 |
||
1061 |
__ generate_fill(t, aligned, to, value, count, rax, xmm0); |
|
1062 |
||
1063 |
__ pop(rdi); |
|
1064 |
__ pop(rsi); |
|
1065 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 |
__ ret(0); |
|
1067 |
return start; |
|
1068 |
} |
|
1069 |
||
1 | 1070 |
address generate_conjoint_copy(BasicType t, bool aligned, |
1071 |
Address::ScaleFactor sf, |
|
1072 |
address nooverlap_target, |
|
8498 | 1073 |
address* entry, const char *name, |
1074 |
bool dest_uninitialized = false) { |
|
1 | 1075 |
__ align(CodeEntryAlignment); |
1076 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1077 |
address start = __ pc(); |
|
1078 |
||
1079 |
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; |
|
1080 |
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1081 |
||
1066 | 1082 |
int shift = Address::times_ptr - sf; |
1 | 1083 |
|
1084 |
const Register src = rax; // source array address |
|
1085 |
const Register dst = rdx; // destination array address |
|
1086 |
const Register from = rsi; // source array address |
|
1087 |
const Register to = rdi; // destination array address |
|
1088 |
const Register count = rcx; // elements count |
|
1089 |
const Register end = rax; // array end address |
|
1090 |
||
1091 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1092 |
__ push(rsi); |
1093 |
__ push(rdi); |
|
1094 |
__ movptr(src , Address(rsp, 12+ 4)); // from |
|
1095 |
__ movptr(dst , Address(rsp, 12+ 8)); // to |
|
1096 |
__ movl2ptr(count, Address(rsp, 12+12)); // count |
|
1 | 1097 |
|
1098 |
if (entry != NULL) { |
|
1099 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
|
1100 |
BLOCK_COMMENT("Entry:"); |
|
1101 |
} |
|
1102 |
||
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1103 |
// nooverlap_target expects arguments in rsi and rdi. |
1066 | 1104 |
__ mov(from, src); |
1105 |
__ mov(to , dst); |
|
1 | 1106 |
|
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1107 |
// arrays overlap test: dispatch to disjoint stub if necessary. |
1 | 1108 |
RuntimeAddress nooverlap(nooverlap_target); |
1066 | 1109 |
__ cmpptr(dst, src); |
1110 |
__ lea(end, Address(src, count, sf, 0)); // src + count * elem_size |
|
1 | 1111 |
__ jump_cc(Assembler::belowEqual, nooverlap); |
1066 | 1112 |
__ cmpptr(dst, end); |
1 | 1113 |
__ jump_cc(Assembler::aboveEqual, nooverlap); |
1114 |
||
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1115 |
if (t == T_OBJECT) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1116 |
__ testl(count, count); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1117 |
__ jcc(Assembler::zero, L_0_count); |
8498 | 1118 |
gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized); |
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1119 |
} |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1120 |
|
1 | 1121 |
// copy from high to low |
1122 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
|
1123 |
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp |
|
1124 |
if (t == T_BYTE || t == T_SHORT) { |
|
1125 |
// Align the end of destination array at 4 bytes address boundary |
|
1066 | 1126 |
__ lea(end, Address(dst, count, sf, 0)); |
1 | 1127 |
if (t == T_BYTE) { |
1128 |
// One byte misalignment happens only for byte arrays |
|
1129 |
__ testl(end, 1); |
|
1130 |
__ jccb(Assembler::zero, L_skip_align1); |
|
1131 |
__ decrement(count); |
|
1132 |
__ movb(rdx, Address(from, count, sf, 0)); |
|
1133 |
__ movb(Address(to, count, sf, 0), rdx); |
|
1134 |
__ BIND(L_skip_align1); |
|
1135 |
} |
|
1136 |
// Two bytes misalignment happens only for byte and short (char) arrays |
|
1137 |
__ testl(end, 2); |
|
1138 |
__ jccb(Assembler::zero, L_skip_align2); |
|
1066 | 1139 |
__ subptr(count, 1<<(shift-1)); |
1 | 1140 |
__ movw(rdx, Address(from, count, sf, 0)); |
1141 |
__ movw(Address(to, count, sf, 0), rdx); |
|
1142 |
__ BIND(L_skip_align2); |
|
1143 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
|
1144 |
__ jcc(Assembler::below, L_copy_4_bytes); |
|
1145 |
} |
|
1146 |
||
1147 |
if (!VM_Version::supports_mmx()) { |
|
1148 |
__ std(); |
|
1066 | 1149 |
__ mov(rax, count); // Save 'count' |
1150 |
__ mov(rdx, to); // Save 'to' |
|
1151 |
__ lea(rsi, Address(from, count, sf, -4)); |
|
1152 |
__ lea(rdi, Address(to , count, sf, -4)); |
|
1153 |
__ shrptr(count, shift); // bytes count |
|
1154 |
__ rep_mov(); |
|
1 | 1155 |
__ cld(); |
1066 | 1156 |
__ mov(count, rax); // restore 'count' |
1 | 1157 |
__ andl(count, (1<<shift)-1); // mask the number of rest elements |
1066 | 1158 |
__ movptr(from, Address(rsp, 12+4)); // reread 'from' |
1159 |
__ mov(to, rdx); // restore 'to' |
|
1 | 1160 |
__ jmpb(L_copy_2_bytes); // all dword were copied |
1161 |
} else { |
|
1162 |
// Align to 8 bytes the end of array. It is aligned to 4 bytes already. |
|
1066 | 1163 |
__ testptr(end, 4); |
1 | 1164 |
__ jccb(Assembler::zero, L_copy_8_bytes); |
1165 |
__ subl(count, 1<<shift); |
|
1166 |
__ movl(rdx, Address(from, count, sf, 0)); |
|
1167 |
__ movl(Address(to, count, sf, 0), rdx); |
|
1168 |
__ jmpb(L_copy_8_bytes); |
|
1169 |
||
5249 | 1170 |
__ align(OptoLoopAlignment); |
1 | 1171 |
// Move 8 bytes |
1172 |
__ BIND(L_copy_8_bytes_loop); |
|
1437 | 1173 |
if (UseXMMForArrayCopy) { |
1174 |
__ movq(xmm0, Address(from, count, sf, 0)); |
|
1175 |
__ movq(Address(to, count, sf, 0), xmm0); |
|
1176 |
} else { |
|
1177 |
__ movq(mmx0, Address(from, count, sf, 0)); |
|
1178 |
__ movq(Address(to, count, sf, 0), mmx0); |
|
1179 |
} |
|
1 | 1180 |
__ BIND(L_copy_8_bytes); |
1181 |
__ subl(count, 2<<shift); |
|
1182 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1183 |
__ addl(count, 2<<shift); |
|
1437 | 1184 |
if (!UseXMMForArrayCopy) { |
1185 |
__ emms(); |
|
1186 |
} |
|
1 | 1187 |
} |
1188 |
__ BIND(L_copy_4_bytes); |
|
1189 |
// copy prefix qword |
|
1190 |
__ testl(count, 1<<shift); |
|
1191 |
__ jccb(Assembler::zero, L_copy_2_bytes); |
|
1192 |
__ movl(rdx, Address(from, count, sf, -4)); |
|
1193 |
__ movl(Address(to, count, sf, -4), rdx); |
|
1194 |
||
1195 |
if (t == T_BYTE || t == T_SHORT) { |
|
1196 |
__ subl(count, (1<<shift)); |
|
1197 |
__ BIND(L_copy_2_bytes); |
|
1198 |
// copy prefix dword |
|
1199 |
__ testl(count, 1<<(shift-1)); |
|
1200 |
__ jccb(Assembler::zero, L_copy_byte); |
|
1201 |
__ movw(rdx, Address(from, count, sf, -2)); |
|
1202 |
__ movw(Address(to, count, sf, -2), rdx); |
|
1203 |
if (t == T_BYTE) { |
|
1204 |
__ subl(count, 1<<(shift-1)); |
|
1205 |
__ BIND(L_copy_byte); |
|
1206 |
// copy prefix byte |
|
1207 |
__ testl(count, 1); |
|
1208 |
__ jccb(Assembler::zero, L_exit); |
|
1209 |
__ movb(rdx, Address(from, 0)); |
|
1210 |
__ movb(Address(to, 0), rdx); |
|
1211 |
__ BIND(L_exit); |
|
1212 |
} else { |
|
1213 |
__ BIND(L_copy_byte); |
|
1214 |
} |
|
1215 |
} else { |
|
1216 |
__ BIND(L_copy_2_bytes); |
|
1217 |
} |
|
1218 |
if (t == T_OBJECT) { |
|
1066 | 1219 |
__ movl2ptr(count, Address(rsp, 12+12)); // reread count |
1 | 1220 |
gen_write_ref_array_post_barrier(to, count); |
1221 |
__ BIND(L_0_count); |
|
1222 |
} |
|
1223 |
inc_copy_counter_np(t); |
|
1066 | 1224 |
__ pop(rdi); |
1225 |
__ pop(rsi); |
|
1 | 1226 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1066 | 1227 |
__ xorptr(rax, rax); // return 0 |
1 | 1228 |
__ ret(0); |
1229 |
return start; |
|
1230 |
} |
|
1231 |
||
1232 |
||
1233 |
address generate_disjoint_long_copy(address* entry, const char *name) { |
|
1234 |
__ align(CodeEntryAlignment); |
|
1235 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1236 |
address start = __ pc(); |
|
1237 |
||
1238 |
Label L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1239 |
const Register from = rax; // source array address |
|
1240 |
const Register to = rdx; // destination array address |
|
1241 |
const Register count = rcx; // elements count |
|
1242 |
const Register to_from = rdx; // (to - from) |
|
1243 |
||
1244 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1245 |
__ movptr(from , Address(rsp, 8+0)); // from |
1246 |
__ movptr(to , Address(rsp, 8+4)); // to |
|
1247 |
__ movl2ptr(count, Address(rsp, 8+8)); // count |
|
1 | 1248 |
|
1249 |
*entry = __ pc(); // Entry point from conjoint arraycopy stub. |
|
1250 |
BLOCK_COMMENT("Entry:"); |
|
1251 |
||
1066 | 1252 |
__ subptr(to, from); // to --> to_from |
1 | 1253 |
if (VM_Version::supports_mmx()) { |
1437 | 1254 |
if (UseXMMForArrayCopy) { |
1255 |
xmm_copy_forward(from, to_from, count); |
|
1256 |
} else { |
|
1257 |
mmx_copy_forward(from, to_from, count); |
|
1258 |
} |
|
1 | 1259 |
} else { |
1260 |
__ jmpb(L_copy_8_bytes); |
|
5249 | 1261 |
__ align(OptoLoopAlignment); |
1 | 1262 |
__ BIND(L_copy_8_bytes_loop); |
1263 |
__ fild_d(Address(from, 0)); |
|
1264 |
__ fistp_d(Address(from, to_from, Address::times_1)); |
|
1066 | 1265 |
__ addptr(from, 8); |
1 | 1266 |
__ BIND(L_copy_8_bytes); |
1267 |
__ decrement(count); |
|
1268 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1269 |
} |
|
1270 |
inc_copy_counter_np(T_LONG); |
|
1271 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1272 |
__ xorptr(rax, rax); // return 0 |
1 | 1273 |
__ ret(0); |
1274 |
return start; |
|
1275 |
} |
|
1276 |
||
1277 |
address generate_conjoint_long_copy(address nooverlap_target, |
|
1278 |
address* entry, const char *name) { |
|
1279 |
__ align(CodeEntryAlignment); |
|
1280 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1281 |
address start = __ pc(); |
|
1282 |
||
1283 |
Label L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1284 |
const Register from = rax; // source array address |
|
1285 |
const Register to = rdx; // destination array address |
|
1286 |
const Register count = rcx; // elements count |
|
1287 |
const Register end_from = rax; // source array end address |
|
1288 |
||
1289 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1290 |
__ movptr(from , Address(rsp, 8+0)); // from |
1291 |
__ movptr(to , Address(rsp, 8+4)); // to |
|
1292 |
__ movl2ptr(count, Address(rsp, 8+8)); // count |
|
1 | 1293 |
|
1294 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
|
1295 |
BLOCK_COMMENT("Entry:"); |
|
1296 |
||
1297 |
// arrays overlap test |
|
1066 | 1298 |
__ cmpptr(to, from); |
1 | 1299 |
RuntimeAddress nooverlap(nooverlap_target); |
1300 |
__ jump_cc(Assembler::belowEqual, nooverlap); |
|
1066 | 1301 |
__ lea(end_from, Address(from, count, Address::times_8, 0)); |
1302 |
__ cmpptr(to, end_from); |
|
1303 |
__ movptr(from, Address(rsp, 8)); // from |
|
1 | 1304 |
__ jump_cc(Assembler::aboveEqual, nooverlap); |
1305 |
||
1306 |
__ jmpb(L_copy_8_bytes); |
|
1307 |
||
5249 | 1308 |
__ align(OptoLoopAlignment); |
1 | 1309 |
__ BIND(L_copy_8_bytes_loop); |
1310 |
if (VM_Version::supports_mmx()) { |
|
1437 | 1311 |
if (UseXMMForArrayCopy) { |
1312 |
__ movq(xmm0, Address(from, count, Address::times_8)); |
|
1313 |
__ movq(Address(to, count, Address::times_8), xmm0); |
|
1314 |
} else { |
|
1315 |
__ movq(mmx0, Address(from, count, Address::times_8)); |
|
1316 |
__ movq(Address(to, count, Address::times_8), mmx0); |
|
1317 |
} |
|
1 | 1318 |
} else { |
1319 |
__ fild_d(Address(from, count, Address::times_8)); |
|
1320 |
__ fistp_d(Address(to, count, Address::times_8)); |
|
1321 |
} |
|
1322 |
__ BIND(L_copy_8_bytes); |
|
1323 |
__ decrement(count); |
|
1324 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1325 |
||
1437 | 1326 |
if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { |
1 | 1327 |
__ emms(); |
1328 |
} |
|
1329 |
inc_copy_counter_np(T_LONG); |
|
1330 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1331 |
__ xorptr(rax, rax); // return 0 |
1 | 1332 |
__ ret(0); |
1333 |
return start; |
|
1334 |
} |
|
1335 |
||
1336 |
||
1337 |
// Helper for generating a dynamic type check. |
|
1338 |
// The sub_klass must be one of {rbx, rdx, rsi}. |
|
1339 |
// The temp is killed. |
|
1340 |
void generate_type_check(Register sub_klass, |
|
1341 |
Address& super_check_offset_addr, |
|
1342 |
Address& super_klass_addr, |
|
1343 |
Register temp, |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1344 |
Label* L_success, Label* L_failure) { |
1 | 1345 |
BLOCK_COMMENT("type_check:"); |
1346 |
||
1347 |
Label L_fallthrough; |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1348 |
#define LOCAL_JCC(assembler_con, label_ptr) \ |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1349 |
if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \ |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1350 |
else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ |
1 | 1351 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1352 |
// The following is a strange variation of the fast path which requires |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1353 |
// one less register, because needed values are on the argument stack. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1354 |
// __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1355 |
// L_success, L_failure, NULL); |
1 | 1356 |
assert_different_registers(sub_klass, temp); |
1357 |
||
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1358 |
int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
1 | 1359 |
|
1360 |
// if the pointers are equal, we are done (e.g., String[] elements) |
|
1066 | 1361 |
__ cmpptr(sub_klass, super_klass_addr); |
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1362 |
LOCAL_JCC(Assembler::equal, L_success); |
1 | 1363 |
|
1364 |
// check the supertype display: |
|
1066 | 1365 |
__ movl2ptr(temp, super_check_offset_addr); |
1 | 1366 |
Address super_check_addr(sub_klass, temp, Address::times_1, 0); |
1066 | 1367 |
__ movptr(temp, super_check_addr); // load displayed supertype |
1368 |
__ cmpptr(temp, super_klass_addr); // test the super type |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1369 |
LOCAL_JCC(Assembler::equal, L_success); |
1 | 1370 |
|
1371 |
// if it was a primary super, we can just fail immediately |
|
1372 |
__ cmpl(super_check_offset_addr, sc_offset); |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1373 |
LOCAL_JCC(Assembler::notEqual, L_failure); |
1 | 1374 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1375 |
// The repne_scan instruction uses fixed registers, which will get spilled. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1376 |
// We happen to know this works best when super_klass is in rax. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1377 |
Register super_klass = temp; |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1378 |
__ movptr(super_klass, super_klass_addr); |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1379 |
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1380 |
L_success, L_failure); |
1 | 1381 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1382 |
__ bind(L_fallthrough); |
1 | 1383 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1384 |
if (L_success == NULL) { BLOCK_COMMENT("L_success:"); } |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1385 |
if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); } |
1 | 1386 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1387 |
#undef LOCAL_JCC |
1 | 1388 |
} |
1389 |
||
1390 |
// |
|
1391 |
// Generate checkcasting array copy stub |
|
1392 |
// |
|
1393 |
// Input: |
|
1394 |
// 4(rsp) - source array address |
|
1395 |
// 8(rsp) - destination array address |
|
1396 |
// 12(rsp) - element count, can be zero |
|
1397 |
// 16(rsp) - size_t ckoff (super_check_offset) |
|
1398 |
// 20(rsp) - oop ckval (super_klass) |
|
1399 |
// |
|
1400 |
// Output: |
|
1401 |
// rax, == 0 - success |
|
1402 |
// rax, == -1^K - failure, where K is partial transfer count |
|
1403 |
// |
|
8498 | 1404 |
address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) { |
1 | 1405 |
__ align(CodeEntryAlignment); |
1406 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1407 |
address start = __ pc(); |
|
1408 |
||
1409 |
Label L_load_element, L_store_element, L_do_card_marks, L_done; |
|
1410 |
||
1411 |
// register use: |
|
1412 |
// rax, rdx, rcx -- loop control (end_from, end_to, count) |
|
1413 |
// rdi, rsi -- element access (oop, klass) |
|
1414 |
// rbx, -- temp |
|
1415 |
const Register from = rax; // source array address |
|
1416 |
const Register to = rdx; // destination array address |
|
1417 |
const Register length = rcx; // elements count |
|
1418 |
const Register elem = rdi; // each oop copied |
|
1419 |
const Register elem_klass = rsi; // each elem._klass (sub_klass) |
|
1420 |
const Register temp = rbx; // lone remaining temp |
|
1421 |
||
1422 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1423 |
||
1066 | 1424 |
__ push(rsi); |
1425 |
__ push(rdi); |
|
1426 |
__ push(rbx); |
|
1 | 1427 |
|
1428 |
Address from_arg(rsp, 16+ 4); // from |
|
1429 |
Address to_arg(rsp, 16+ 8); // to |
|
1430 |
Address length_arg(rsp, 16+12); // elements count |
|
1431 |
Address ckoff_arg(rsp, 16+16); // super_check_offset |
|
1432 |
Address ckval_arg(rsp, 16+20); // super_klass |
|
1433 |
||
1434 |
// Load up: |
|
1066 | 1435 |
__ movptr(from, from_arg); |
1436 |
__ movptr(to, to_arg); |
|
1437 |
__ movl2ptr(length, length_arg); |
|
1 | 1438 |
|
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1439 |
if (entry != NULL) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1440 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1441 |
BLOCK_COMMENT("Entry:"); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1442 |
} |
1 | 1443 |
|
1444 |
//--------------------------------------------------------------- |
|
1445 |
// Assembler stub will be used for this call to arraycopy |
|
1446 |
// if the two arrays are subtypes of Object[] but the |
|
1447 |
// destination array type is not equal to or a supertype |
|
1448 |
// of the source type. Each element must be separately |
|
1449 |
// checked. |
|
1450 |
||
1451 |
// Loop-invariant addresses. They are exclusive end pointers. |
|
1066 | 1452 |
Address end_from_addr(from, length, Address::times_ptr, 0); |
1453 |
Address end_to_addr(to, length, Address::times_ptr, 0); |
|
1 | 1454 |
|
1455 |
Register end_from = from; // re-use |
|
1456 |
Register end_to = to; // re-use |
|
1457 |
Register count = length; // re-use |
|
1458 |
||
1459 |
// Loop-variant addresses. They assume post-incremented count < 0. |
|
1066 | 1460 |
Address from_element_addr(end_from, count, Address::times_ptr, 0); |
1461 |
Address to_element_addr(end_to, count, Address::times_ptr, 0); |
|
1 | 1462 |
Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); |
1463 |
||
1464 |
// Copy from low to high addresses, indexed from the end of each array. |
|
8498 | 1465 |
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
1066 | 1466 |
__ lea(end_from, end_from_addr); |
1467 |
__ lea(end_to, end_to_addr); |
|
1 | 1468 |
assert(length == count, ""); // else fix next line: |
1066 | 1469 |
__ negptr(count); // negate and test the length |
1 | 1470 |
__ jccb(Assembler::notZero, L_load_element); |
1471 |
||
1472 |
// Empty array: Nothing to do. |
|
1066 | 1473 |
__ xorptr(rax, rax); // return 0 on (trivial) success |
1 | 1474 |
__ jmp(L_done); |
1475 |
||
1476 |
// ======== begin loop ======== |
|
1477 |
// (Loop is rotated; its entry is L_load_element.) |
|
1478 |
// Loop control: |
|
1479 |
// for (count = -count; count != 0; count++) |
|
1480 |
// Base pointers src, dst are biased by 8*count,to last element. |
|
5249 | 1481 |
__ align(OptoLoopAlignment); |
1 | 1482 |
|
1483 |
__ BIND(L_store_element); |
|
1066 | 1484 |
__ movptr(to_element_addr, elem); // store the oop |
1 | 1485 |
__ increment(count); // increment the count toward zero |
1486 |
__ jccb(Assembler::zero, L_do_card_marks); |
|
1487 |
||
1488 |
// ======== loop entry is here ======== |
|
1489 |
__ BIND(L_load_element); |
|
1066 | 1490 |
__ movptr(elem, from_element_addr); // load the oop |
1491 |
__ testptr(elem, elem); |
|
1 | 1492 |
__ jccb(Assembler::zero, L_store_element); |
1493 |
||
1494 |
// (Could do a trick here: Remember last successful non-null |
|
1495 |
// element stored and make a quick oop equality check on it.) |
|
1496 |
||
1066 | 1497 |
__ movptr(elem_klass, elem_klass_addr); // query the object klass |
1 | 1498 |
generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp, |
1499 |
&L_store_element, NULL); |
|
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1500 |
// (On fall-through, we have failed the element type check.) |
1 | 1501 |
// ======== end loop ======== |
1502 |
||
1503 |
// It was a real error; we must depend on the caller to finish the job. |
|
192
d66300cdf939
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
189
diff
changeset
|
1504 |
// Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops. |
d66300cdf939
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
189
diff
changeset
|
1505 |
// Emit GC store barriers for the oops we have copied (length_arg + count), |
1 | 1506 |
// and report their number to the caller. |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1507 |
assert_different_registers(to, count, rax); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1508 |
Label L_post_barrier; |
1 | 1509 |
__ addl(count, length_arg); // transfers = (length - remaining) |
1066 | 1510 |
__ movl2ptr(rax, count); // save the value |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1511 |
__ notptr(rax); // report (-1^K) to caller (does not affect flags) |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1512 |
__ jccb(Assembler::notZero, L_post_barrier); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1513 |
__ jmp(L_done); // K == 0, nothing was copied, skip post barrier |
1 | 1514 |
|
1515 |
// Come here on success only. |
|
1516 |
__ BIND(L_do_card_marks); |
|
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1517 |
__ xorptr(rax, rax); // return 0 on success |
1066 | 1518 |
__ movl2ptr(count, length_arg); |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1519 |
|
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1520 |
__ BIND(L_post_barrier); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1521 |
__ movptr(to, to_arg); // reload |
1 | 1522 |
gen_write_ref_array_post_barrier(to, count); |
1523 |
||
1524 |
// Common exit point (success or failure). |
|
1525 |
__ BIND(L_done); |
|
1066 | 1526 |
__ pop(rbx); |
1527 |
__ pop(rdi); |
|
1528 |
__ pop(rsi); |
|
1 | 1529 |
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); |
1530 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1531 |
__ ret(0); |
|
1532 |
||
1533 |
return start; |
|
1534 |
} |
|
1535 |
||
1536 |
// |
|
1537 |
// Generate 'unsafe' array copy stub |
|
1538 |
// Though just as safe as the other stubs, it takes an unscaled |
|
1539 |
// size_t argument instead of an element count. |
|
1540 |
// |
|
1541 |
// Input: |
|
1542 |
// 4(rsp) - source array address |
|
1543 |
// 8(rsp) - destination array address |
|
1544 |
// 12(rsp) - byte count, can be zero |
|
1545 |
// |
|
1546 |
// Output: |
|
1547 |
// rax, == 0 - success |
|
1548 |
// rax, == -1 - need to call System.arraycopy |
|
1549 |
// |
|
1550 |
// Examines the alignment of the operands and dispatches |
|
1551 |
// to a long, int, short, or byte copy loop. |
|
1552 |
// |
|
1553 |
address generate_unsafe_copy(const char *name, |
|
1554 |
address byte_copy_entry, |
|
1555 |
address short_copy_entry, |
|
1556 |
address int_copy_entry, |
|
1557 |
address long_copy_entry) { |
|
1558 |
||
1559 |
Label L_long_aligned, L_int_aligned, L_short_aligned; |
|
1560 |
||
1561 |
__ align(CodeEntryAlignment); |
|
1562 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1563 |
address start = __ pc(); |
|
1564 |
||
1565 |
const Register from = rax; // source array address |
|
1566 |
const Register to = rdx; // destination array address |
|
1567 |
const Register count = rcx; // elements count |
|
1568 |
||
1569 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1570 |
__ push(rsi); |
1571 |
__ push(rdi); |
|
1 | 1572 |
Address from_arg(rsp, 12+ 4); // from |
1573 |
Address to_arg(rsp, 12+ 8); // to |
|
1574 |
Address count_arg(rsp, 12+12); // byte count |
|
1575 |
||
1576 |
// Load up: |
|
1066 | 1577 |
__ movptr(from , from_arg); |
1578 |
__ movptr(to , to_arg); |
|
1579 |
__ movl2ptr(count, count_arg); |
|
1 | 1580 |
|
1581 |
// bump this on entry, not on exit: |
|
1582 |
inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); |
|
1583 |
||
1584 |
const Register bits = rsi; |
|
1066 | 1585 |
__ mov(bits, from); |
1586 |
__ orptr(bits, to); |
|
1587 |
__ orptr(bits, count); |
|
1 | 1588 |
|
1589 |
__ testl(bits, BytesPerLong-1); |
|
1590 |
__ jccb(Assembler::zero, L_long_aligned); |
|
1591 |
||
1592 |
__ testl(bits, BytesPerInt-1); |
|
1593 |
__ jccb(Assembler::zero, L_int_aligned); |
|
1594 |
||
1595 |
__ testl(bits, BytesPerShort-1); |
|
1596 |
__ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); |
|
1597 |
||
1598 |
__ BIND(L_short_aligned); |
|
1066 | 1599 |
__ shrptr(count, LogBytesPerShort); // size => short_count |
1 | 1600 |
__ movl(count_arg, count); // update 'count' |
1601 |
__ jump(RuntimeAddress(short_copy_entry)); |
|
1602 |
||
1603 |
__ BIND(L_int_aligned); |
|
1066 | 1604 |
__ shrptr(count, LogBytesPerInt); // size => int_count |
1 | 1605 |
__ movl(count_arg, count); // update 'count' |
1606 |
__ jump(RuntimeAddress(int_copy_entry)); |
|
1607 |
||
1608 |
__ BIND(L_long_aligned); |
|
1066 | 1609 |
__ shrptr(count, LogBytesPerLong); // size => qword_count |
1 | 1610 |
__ movl(count_arg, count); // update 'count' |
1066 | 1611 |
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1612 |
__ pop(rsi); |
|
1 | 1613 |
__ jump(RuntimeAddress(long_copy_entry)); |
1614 |
||
1615 |
return start; |
|
1616 |
} |
|
1617 |
||
1618 |
||
1619 |
// Perform range checks on the proposed arraycopy. |
|
1620 |
// Smashes src_pos and dst_pos. (Uses them up for temps.) |
|
1621 |
void arraycopy_range_checks(Register src, |
|
1622 |
Register src_pos, |
|
1623 |
Register dst, |
|
1624 |
Register dst_pos, |
|
1625 |
Address& length, |
|
1626 |
Label& L_failed) { |
|
1627 |
BLOCK_COMMENT("arraycopy_range_checks:"); |
|
1628 |
const Register src_end = src_pos; // source array end position |
|
1629 |
const Register dst_end = dst_pos; // destination array end position |
|
1630 |
__ addl(src_end, length); // src_pos + length |
|
1631 |
__ addl(dst_end, length); // dst_pos + length |
|
1632 |
||
1633 |
// if (src_pos + length > arrayOop(src)->length() ) FAIL; |
|
1634 |
__ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes())); |
|
1635 |
__ jcc(Assembler::above, L_failed); |
|
1636 |
||
1637 |
// if (dst_pos + length > arrayOop(dst)->length() ) FAIL; |
|
1638 |
__ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes())); |
|
1639 |
__ jcc(Assembler::above, L_failed); |
|
1640 |
||
1641 |
BLOCK_COMMENT("arraycopy_range_checks done"); |
|
1642 |
} |
|
1643 |
||
1644 |
||
1645 |
// |
|
1646 |
// Generate generic array copy stubs |
|
1647 |
// |
|
1648 |
// Input: |
|
1649 |
// 4(rsp) - src oop |
|
1650 |
// 8(rsp) - src_pos |
|
1651 |
// 12(rsp) - dst oop |
|
1652 |
// 16(rsp) - dst_pos |
|
1653 |
// 20(rsp) - element count |
|
1654 |
// |
|
1655 |
// Output: |
|
1656 |
// rax, == 0 - success |
|
1657 |
// rax, == -1^K - failure, where K is partial transfer count |
|
1658 |
// |
|
1659 |
address generate_generic_copy(const char *name, |
|
1660 |
address entry_jbyte_arraycopy, |
|
1661 |
address entry_jshort_arraycopy, |
|
1662 |
address entry_jint_arraycopy, |
|
1663 |
address entry_oop_arraycopy, |
|
1664 |
address entry_jlong_arraycopy, |
|
1665 |
address entry_checkcast_arraycopy) { |
|
1666 |
Label L_failed, L_failed_0, L_objArray; |
|
1667 |
||
1668 |
{ int modulus = CodeEntryAlignment; |
|
1669 |
int target = modulus - 5; // 5 = sizeof jmp(L_failed) |
|
1670 |
int advance = target - (__ offset() % modulus); |
|
1671 |
if (advance < 0) advance += modulus; |
|
1672 |
if (advance > 0) __ nop(advance); |
|
1673 |
} |
|
1674 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1675 |
||
1676 |
// Short-hop target to L_failed. Makes for denser prologue code. |
|
1677 |
__ BIND(L_failed_0); |
|
1678 |
__ jmp(L_failed); |
|
1679 |
assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); |
|
1680 |
||
1681 |
__ align(CodeEntryAlignment); |
|
1682 |
address start = __ pc(); |
|
1683 |
||
1684 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1685 |
__ push(rsi); |
1686 |
__ push(rdi); |
|
1 | 1687 |
|
1688 |
// bump this on entry, not on exit: |
|
1689 |
inc_counter_np(SharedRuntime::_generic_array_copy_ctr); |
|
1690 |
||
1691 |
// Input values |
|
1692 |
Address SRC (rsp, 12+ 4); |
|
1693 |
Address SRC_POS (rsp, 12+ 8); |
|
1694 |
Address DST (rsp, 12+12); |
|
1695 |
Address DST_POS (rsp, 12+16); |
|
1696 |
Address LENGTH (rsp, 12+20); |
|
1697 |
||
1698 |
//----------------------------------------------------------------------- |
|
1699 |
// Assembler stub will be used for this call to arraycopy |
|
1700 |
// if the following conditions are met: |
|
1701 |
// |
|
1702 |
// (1) src and dst must not be null. |
|
1703 |
// (2) src_pos must not be negative. |
|
1704 |
// (3) dst_pos must not be negative. |
|
1705 |
// (4) length must not be negative. |
|
1706 |
// (5) src klass and dst klass should be the same and not NULL. |
|
1707 |
// (6) src and dst should be arrays. |
|
1708 |
// (7) src_pos + length must not exceed length of src. |
|
1709 |
// (8) dst_pos + length must not exceed length of dst. |
|
1710 |
// |
|
1711 |
||
1712 |
const Register src = rax; // source array oop |
|
1713 |
const Register src_pos = rsi; |
|
1714 |
const Register dst = rdx; // destination array oop |
|
1715 |
const Register dst_pos = rdi; |
|
1716 |
const Register length = rcx; // transfer count |
|
1717 |
||
1718 |
// if (src == NULL) return -1; |
|
1066 | 1719 |
__ movptr(src, SRC); // src oop |
1720 |
__ testptr(src, src); |
|
1 | 1721 |
__ jccb(Assembler::zero, L_failed_0); |
1722 |
||
1723 |
// if (src_pos < 0) return -1; |
|
1066 | 1724 |
__ movl2ptr(src_pos, SRC_POS); // src_pos |
1 | 1725 |
__ testl(src_pos, src_pos); |
1726 |
__ jccb(Assembler::negative, L_failed_0); |
|
1727 |
||
1728 |
// if (dst == NULL) return -1; |
|
1066 | 1729 |
__ movptr(dst, DST); // dst oop |
1730 |
__ testptr(dst, dst); |
|
1 | 1731 |
__ jccb(Assembler::zero, L_failed_0); |
1732 |
||
1733 |
// if (dst_pos < 0) return -1; |
|
1066 | 1734 |
__ movl2ptr(dst_pos, DST_POS); // dst_pos |
1 | 1735 |
__ testl(dst_pos, dst_pos); |
1736 |
__ jccb(Assembler::negative, L_failed_0); |
|
1737 |
||
1738 |
// if (length < 0) return -1; |
|
1066 | 1739 |
__ movl2ptr(length, LENGTH); // length |
1 | 1740 |
__ testl(length, length); |
1741 |
__ jccb(Assembler::negative, L_failed_0); |
|
1742 |
||
1743 |
// if (src->klass() == NULL) return -1; |
|
1744 |
Address src_klass_addr(src, oopDesc::klass_offset_in_bytes()); |
|
1745 |
Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes()); |
|
1746 |
const Register rcx_src_klass = rcx; // array klass |
|
1066 | 1747 |
__ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes())); |
1 | 1748 |
|
1749 |
#ifdef ASSERT |
|
1750 |
// assert(src->klass() != NULL); |
|
1751 |
BLOCK_COMMENT("assert klasses not null"); |
|
1752 |
{ Label L1, L2; |
|
1066 | 1753 |
__ testptr(rcx_src_klass, rcx_src_klass); |
1 | 1754 |
__ jccb(Assembler::notZero, L2); // it is broken if klass is NULL |
1755 |
__ bind(L1); |
|
1756 |
__ stop("broken null klass"); |
|
1757 |
__ bind(L2); |
|
1066 | 1758 |
__ cmpptr(dst_klass_addr, (int32_t)NULL_WORD); |
1 | 1759 |
__ jccb(Assembler::equal, L1); // this would be broken also |
1760 |
BLOCK_COMMENT("assert done"); |
|
1761 |
} |
|
1762 |
#endif //ASSERT |
|
1763 |
||
1764 |
// Load layout helper (32-bits) |
|
1765 |
// |
|
1766 |
// |array_tag| | header_size | element_type | |log2_element_size| |
|
1767 |
// 32 30 24 16 8 2 0 |
|
1768 |
// |
|
1769 |
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 |
|
1770 |
// |
|
1771 |
||
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1772 |
int lh_offset = in_bytes(Klass::layout_helper_offset()); |
1 | 1773 |
Address src_klass_lh_addr(rcx_src_klass, lh_offset); |
1774 |
||
1775 |
// Handle objArrays completely differently... |
|
1776 |
jint objArray_lh = Klass::array_layout_helper(T_OBJECT); |
|
1777 |
__ cmpl(src_klass_lh_addr, objArray_lh); |
|
1778 |
__ jcc(Assembler::equal, L_objArray); |
|
1779 |
||
1780 |
// if (src->klass() != dst->klass()) return -1; |
|
1066 | 1781 |
__ cmpptr(rcx_src_klass, dst_klass_addr); |
1 | 1782 |
__ jccb(Assembler::notEqual, L_failed_0); |
1783 |
||
1784 |
const Register rcx_lh = rcx; // layout helper |
|
1785 |
assert(rcx_lh == rcx_src_klass, "known alias"); |
|
1786 |
__ movl(rcx_lh, src_klass_lh_addr); |
|
1787 |
||
1788 |
// if (!src->is_Array()) return -1; |
|
1789 |
__ cmpl(rcx_lh, Klass::_lh_neutral_value); |
|
1790 |
__ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp |
|
1791 |
||
1792 |
// At this point, it is known to be a typeArray (array_tag 0x3). |
|
1793 |
#ifdef ASSERT |
|
1794 |
{ Label L; |
|
1795 |
__ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); |
|
1796 |
__ jcc(Assembler::greaterEqual, L); // signed cmp |
|
1797 |
__ stop("must be a primitive array"); |
|
1798 |
__ bind(L); |
|
1799 |
} |
|
1800 |
#endif |
|
1801 |
||
1802 |
assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); |
|
1803 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
|
1804 |
||
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1805 |
// TypeArrayKlass |
1 | 1806 |
// |
1807 |
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); |
|
1808 |
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); |
|
1809 |
// |
|
1810 |
const Register rsi_offset = rsi; // array offset |
|
1811 |
const Register src_array = src; // src array offset |
|
1812 |
const Register dst_array = dst; // dst array offset |
|
1813 |
const Register rdi_elsize = rdi; // log2 element size |
|
1814 |
||
1066 | 1815 |
__ mov(rsi_offset, rcx_lh); |
1816 |
__ shrptr(rsi_offset, Klass::_lh_header_size_shift); |
|
1817 |
__ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset |
|
1818 |
__ addptr(src_array, rsi_offset); // src array offset |
|
1819 |
__ addptr(dst_array, rsi_offset); // dst array offset |
|
1820 |
__ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize |
|
1 | 1821 |
|
1822 |
// next registers should be set before the jump to corresponding stub |
|
1823 |
const Register from = src; // source array address |
|
1824 |
const Register to = dst; // destination array address |
|
1825 |
const Register count = rcx; // elements count |
|
1826 |
// some of them should be duplicated on stack |
|
1827 |
#define FROM Address(rsp, 12+ 4) |
|
1828 |
#define TO Address(rsp, 12+ 8) // Not used now |
|
1829 |
#define COUNT Address(rsp, 12+12) // Only for oop arraycopy |
|
1830 |
||
1831 |
BLOCK_COMMENT("scale indexes to element size"); |
|
1066 | 1832 |
__ movl2ptr(rsi, SRC_POS); // src_pos |
1833 |
__ shlptr(rsi); // src_pos << rcx (log2 elsize) |
|
1 | 1834 |
assert(src_array == from, ""); |
1066 | 1835 |
__ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize |
1836 |
__ movl2ptr(rdi, DST_POS); // dst_pos |
|
1837 |
__ shlptr(rdi); // dst_pos << rcx (log2 elsize) |
|
1 | 1838 |
assert(dst_array == to, ""); |
1066 | 1839 |
__ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize |
1840 |
__ movptr(FROM, from); // src_addr |
|
1841 |
__ mov(rdi_elsize, rcx_lh); // log2 elsize |
|
1842 |
__ movl2ptr(count, LENGTH); // elements count |
|
1 | 1843 |
|
1844 |
BLOCK_COMMENT("choose copy loop based on element size"); |
|
1845 |
__ cmpl(rdi_elsize, 0); |
|
1846 |
||
1847 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy)); |
|
1848 |
__ cmpl(rdi_elsize, LogBytesPerShort); |
|
1849 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy)); |
|
1850 |
__ cmpl(rdi_elsize, LogBytesPerInt); |
|
1851 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy)); |
|
1852 |
#ifdef ASSERT |
|
1853 |
__ cmpl(rdi_elsize, LogBytesPerLong); |
|
1854 |
__ jccb(Assembler::notEqual, L_failed); |
|
1855 |
#endif |
|
1066 | 1856 |
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1857 |
__ pop(rsi); |
|
1 | 1858 |
__ jump(RuntimeAddress(entry_jlong_arraycopy)); |
1859 |
||
1860 |
__ BIND(L_failed); |
|
1066 | 1861 |
__ xorptr(rax, rax); |
1862 |
__ notptr(rax); // return -1 |
|
1863 |
__ pop(rdi); |
|
1864 |
__ pop(rsi); |
|
1 | 1865 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1866 |
__ ret(0); |
|
1867 |
||
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1868 |
// ObjArrayKlass |
1 | 1869 |
__ BIND(L_objArray); |
1870 |
// live at this point: rcx_src_klass, src[_pos], dst[_pos] |
|
1871 |
||
1872 |
Label L_plain_copy, L_checkcast_copy; |
|
1873 |
// test array classes for subtyping |
|
1066 | 1874 |
__ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality |
1 | 1875 |
__ jccb(Assembler::notEqual, L_checkcast_copy); |
1876 |
||
1877 |
// Identically typed arrays can be copied without element-wise checks. |
|
1878 |
assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass); |
|
1879 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
|
1880 |
||
1881 |
__ BIND(L_plain_copy); |
|
1066 | 1882 |
__ movl2ptr(count, LENGTH); // elements count |
1883 |
__ movl2ptr(src_pos, SRC_POS); // reload src_pos |
|
1884 |
__ lea(from, Address(src, src_pos, Address::times_ptr, |
|
1885 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr |
|
1886 |
__ movl2ptr(dst_pos, DST_POS); // reload dst_pos |
|
1887 |
__ lea(to, Address(dst, dst_pos, Address::times_ptr, |
|
1888 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr |
|
1889 |
__ movptr(FROM, from); // src_addr |
|
1890 |
__ movptr(TO, to); // dst_addr |
|
1 | 1891 |
__ movl(COUNT, count); // count |
1892 |
__ jump(RuntimeAddress(entry_oop_arraycopy)); |
|
1893 |
||
1894 |
__ BIND(L_checkcast_copy); |
|
1895 |
// live at this point: rcx_src_klass, dst[_pos], src[_pos] |
|
1896 |
{ |
|
1897 |
// Handy offsets: |
|
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1898 |
int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); |
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1899 |
int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
1 | 1900 |
|
1901 |
Register rsi_dst_klass = rsi; |
|
1902 |
Register rdi_temp = rdi; |
|
1903 |
assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); |
|
1904 |
assert(rdi_temp == dst_pos, "expected alias w/ dst_pos"); |
|
1905 |
Address dst_klass_lh_addr(rsi_dst_klass, lh_offset); |
|
1906 |
||
1907 |
// Before looking at dst.length, make sure dst is also an objArray. |
|
1066 | 1908 |
__ movptr(rsi_dst_klass, dst_klass_addr); |
1 | 1909 |
__ cmpl(dst_klass_lh_addr, objArray_lh); |
1910 |
__ jccb(Assembler::notEqual, L_failed); |
|
1911 |
||
1912 |
// It is safe to examine both src.length and dst.length. |
|
1066 | 1913 |
__ movl2ptr(src_pos, SRC_POS); // reload rsi |
1 | 1914 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
1915 |
// (Now src_pos and dst_pos are killed, but not src and dst.) |
|
1916 |
||
1917 |
// We'll need this temp (don't forget to pop it after the type check). |
|
1066 | 1918 |
__ push(rbx); |
1 | 1919 |
Register rbx_src_klass = rbx; |
1920 |
||
1066 | 1921 |
__ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx |
1922 |
__ movptr(rsi_dst_klass, dst_klass_addr); |
|
1 | 1923 |
Address super_check_offset_addr(rsi_dst_klass, sco_offset); |
1924 |
Label L_fail_array_check; |
|
1925 |
generate_type_check(rbx_src_klass, |
|
1926 |
super_check_offset_addr, dst_klass_addr, |
|
1927 |
rdi_temp, NULL, &L_fail_array_check); |
|
1928 |
// (On fall-through, we have passed the array type check.) |
|
1066 | 1929 |
__ pop(rbx); |
1 | 1930 |
__ jmp(L_plain_copy); |
1931 |
||
1932 |
__ BIND(L_fail_array_check); |
|
1933 |
// Reshuffle arguments so we can call checkcast_arraycopy: |
|
1934 |
||
1935 |
// match initial saves for checkcast_arraycopy |
|
1066 | 1936 |
// push(rsi); // already done; see above |
1937 |
// push(rdi); // already done; see above |
|
1938 |
// push(rbx); // already done; see above |
|
1 | 1939 |
|
1940 |
// Marshal outgoing arguments now, freeing registers. |
|
1941 |
Address from_arg(rsp, 16+ 4); // from |
|
1942 |
Address to_arg(rsp, 16+ 8); // to |
|
1943 |
Address length_arg(rsp, 16+12); // elements count |
|
1944 |
Address ckoff_arg(rsp, 16+16); // super_check_offset |
|
1945 |
Address ckval_arg(rsp, 16+20); // super_klass |
|
1946 |
||
1947 |
Address SRC_POS_arg(rsp, 16+ 8); |
|
1948 |
Address DST_POS_arg(rsp, 16+16); |
|
1949 |
Address LENGTH_arg(rsp, 16+20); |
|
1950 |
// push rbx, changed the incoming offsets (why not just use rbp,??) |
|
1951 |
// assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, ""); |
|
1952 |
||
1066 | 1953 |
__ movptr(rbx, Address(rsi_dst_klass, ek_offset)); |
1954 |
__ movl2ptr(length, LENGTH_arg); // reload elements count |
|
1955 |
__ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos |
|
1956 |
__ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos |
|
1 | 1957 |
|
1066 | 1958 |
__ movptr(ckval_arg, rbx); // destination element type |
1 | 1959 |
__ movl(rbx, Address(rbx, sco_offset)); |
1960 |
__ movl(ckoff_arg, rbx); // corresponding class check offset |
|
1961 |
||
1962 |
__ movl(length_arg, length); // outgoing length argument |
|
1963 |
||
1066 | 1964 |
__ lea(from, Address(src, src_pos, Address::times_ptr, |
1 | 1965 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
1066 | 1966 |
__ movptr(from_arg, from); |
1 | 1967 |
|
1066 | 1968 |
__ lea(to, Address(dst, dst_pos, Address::times_ptr, |
1 | 1969 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
1066 | 1970 |
__ movptr(to_arg, to); |
1 | 1971 |
__ jump(RuntimeAddress(entry_checkcast_arraycopy)); |
1972 |
} |
|
1973 |
||
1974 |
return start; |
|
1975 |
} |
|
1976 |
||
1977 |
void generate_arraycopy_stubs() { |
|
1978 |
address entry; |
|
1979 |
address entry_jbyte_arraycopy; |
|
1980 |
address entry_jshort_arraycopy; |
|
1981 |
address entry_jint_arraycopy; |
|
1982 |
address entry_oop_arraycopy; |
|
1983 |
address entry_jlong_arraycopy; |
|
1984 |
address entry_checkcast_arraycopy; |
|
1985 |
||
1986 |
StubRoutines::_arrayof_jbyte_disjoint_arraycopy = |
|
1987 |
generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry, |
|
1988 |
"arrayof_jbyte_disjoint_arraycopy"); |
|
1989 |
StubRoutines::_arrayof_jbyte_arraycopy = |
|
1990 |
generate_conjoint_copy(T_BYTE, true, Address::times_1, entry, |
|
1991 |
NULL, "arrayof_jbyte_arraycopy"); |
|
1992 |
StubRoutines::_jbyte_disjoint_arraycopy = |
|
1993 |
generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry, |
|
1994 |
"jbyte_disjoint_arraycopy"); |
|
1995 |
StubRoutines::_jbyte_arraycopy = |
|
1996 |
generate_conjoint_copy(T_BYTE, false, Address::times_1, entry, |
|
1997 |
&entry_jbyte_arraycopy, "jbyte_arraycopy"); |
|
1998 |
||
1999 |
StubRoutines::_arrayof_jshort_disjoint_arraycopy = |
|
2000 |
generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry, |
|
2001 |
"arrayof_jshort_disjoint_arraycopy"); |
|
2002 |
StubRoutines::_arrayof_jshort_arraycopy = |
|
2003 |
generate_conjoint_copy(T_SHORT, true, Address::times_2, entry, |
|
2004 |
NULL, "arrayof_jshort_arraycopy"); |
|
2005 |
StubRoutines::_jshort_disjoint_arraycopy = |
|
2006 |
generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry, |
|
2007 |
"jshort_disjoint_arraycopy"); |
|
2008 |
StubRoutines::_jshort_arraycopy = |
|
2009 |
generate_conjoint_copy(T_SHORT, false, Address::times_2, entry, |
|
2010 |
&entry_jshort_arraycopy, "jshort_arraycopy"); |
|
2011 |
||
2012 |
// Next arrays are always aligned on 4 bytes at least. |
|
2013 |
StubRoutines::_jint_disjoint_arraycopy = |
|
2014 |
generate_disjoint_copy(T_INT, true, Address::times_4, &entry, |
|
2015 |
"jint_disjoint_arraycopy"); |
|
2016 |
StubRoutines::_jint_arraycopy = |
|
2017 |
generate_conjoint_copy(T_INT, true, Address::times_4, entry, |
|
2018 |
&entry_jint_arraycopy, "jint_arraycopy"); |
|
2019 |
||
2020 |
StubRoutines::_oop_disjoint_arraycopy = |
|
1066 | 2021 |
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
1 | 2022 |
"oop_disjoint_arraycopy"); |
2023 |
StubRoutines::_oop_arraycopy = |
|
1066 | 2024 |
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
1 | 2025 |
&entry_oop_arraycopy, "oop_arraycopy"); |
2026 |
||
8498 | 2027 |
StubRoutines::_oop_disjoint_arraycopy_uninit = |
2028 |
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
|
2029 |
"oop_disjoint_arraycopy_uninit", |
|
2030 |
/*dest_uninitialized*/true); |
|
2031 |
StubRoutines::_oop_arraycopy_uninit = |
|
2032 |
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
|
2033 |
NULL, "oop_arraycopy_uninit", |
|
2034 |
/*dest_uninitialized*/true); |
|
2035 |
||
1 | 2036 |
StubRoutines::_jlong_disjoint_arraycopy = |
2037 |
generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy"); |
|
2038 |
StubRoutines::_jlong_arraycopy = |
|
2039 |
generate_conjoint_long_copy(entry, &entry_jlong_arraycopy, |
|
2040 |
"jlong_arraycopy"); |
|
2041 |
||
6433 | 2042 |
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); |
2043 |
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); |
|
2044 |
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); |
|
2045 |
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); |
|
2046 |
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); |
|
2047 |
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); |
|
2048 |
||
8498 | 2049 |
StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; |
2050 |
StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; |
|
2051 |
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; |
|
2052 |
StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; |
|
1 | 2053 |
|
8498 | 2054 |
StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; |
2055 |
StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; |
|
2056 |
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; |
|
2057 |
StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; |
|
1 | 2058 |
|
2059 |
StubRoutines::_checkcast_arraycopy = |
|
8498 | 2060 |
generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); |
2061 |
StubRoutines::_checkcast_arraycopy_uninit = |
|
2062 |
generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true); |
|
1 | 2063 |
|
2064 |
StubRoutines::_unsafe_arraycopy = |
|
2065 |
generate_unsafe_copy("unsafe_arraycopy", |
|
2066 |
entry_jbyte_arraycopy, |
|
2067 |
entry_jshort_arraycopy, |
|
2068 |
entry_jint_arraycopy, |
|
2069 |
entry_jlong_arraycopy); |
|
2070 |
||
2071 |
StubRoutines::_generic_arraycopy = |
|
2072 |
generate_generic_copy("generic_arraycopy", |
|
2073 |
entry_jbyte_arraycopy, |
|
2074 |
entry_jshort_arraycopy, |
|
2075 |
entry_jint_arraycopy, |
|
2076 |
entry_oop_arraycopy, |
|
2077 |
entry_jlong_arraycopy, |
|
2078 |
entry_checkcast_arraycopy); |
|
2079 |
} |
|
2080 |
||
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2081 |
void generate_math_stubs() { |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2082 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2083 |
StubCodeMark mark(this, "StubRoutines", "log"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2084 |
StubRoutines::_intrinsic_log = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2085 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2086 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2087 |
__ flog(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2088 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2089 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2090 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2091 |
StubCodeMark mark(this, "StubRoutines", "log10"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2092 |
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2093 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2094 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2095 |
__ flog10(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2096 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2097 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2098 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2099 |
StubCodeMark mark(this, "StubRoutines", "sin"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2100 |
StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2101 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2102 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2103 |
__ trigfunc('s'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2104 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2105 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2106 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2107 |
StubCodeMark mark(this, "StubRoutines", "cos"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2108 |
StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2109 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2110 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2111 |
__ trigfunc('c'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2112 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2113 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2114 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2115 |
StubCodeMark mark(this, "StubRoutines", "tan"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2116 |
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2117 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2118 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2119 |
__ trigfunc('t'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2120 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2121 |
} |
12739
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2122 |
{ |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2123 |
StubCodeMark mark(this, "StubRoutines", "exp"); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2124 |
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc(); |
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2125 |
|
12739
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2126 |
__ fld_d(Address(rsp, 4)); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2127 |
__ exp_with_fallback(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2128 |
__ ret(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2129 |
} |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2130 |
{ |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2131 |
StubCodeMark mark(this, "StubRoutines", "pow"); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2132 |
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2133 |
|
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2134 |
__ fld_d(Address(rsp, 12)); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2135 |
__ fld_d(Address(rsp, 4)); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2136 |
__ pow_with_fallback(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2137 |
__ ret(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2138 |
} |
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2139 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2140 |
|
14132 | 2141 |
// AES intrinsic stubs |
2142 |
enum {AESBlockSize = 16}; |
|
2143 |
||
2144 |
address generate_key_shuffle_mask() { |
|
2145 |
__ align(16); |
|
2146 |
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); |
|
2147 |
address start = __ pc(); |
|
2148 |
__ emit_data(0x00010203, relocInfo::none, 0 ); |
|
2149 |
__ emit_data(0x04050607, relocInfo::none, 0 ); |
|
2150 |
__ emit_data(0x08090a0b, relocInfo::none, 0 ); |
|
2151 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); |
|
2152 |
return start; |
|
2153 |
} |
|
2154 |
||
2155 |
// Utility routine for loading a 128-bit key word in little endian format |
|
2156 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2157 |
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2158 |
__ movdqu(xmmdst, Address(key, offset)); |
|
2159 |
if (xmm_shuf_mask != NULL) { |
|
2160 |
__ pshufb(xmmdst, xmm_shuf_mask); |
|
2161 |
} else { |
|
2162 |
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2163 |
} |
|
2164 |
} |
|
2165 |
||
2166 |
// aesenc using specified key+offset |
|
2167 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2168 |
void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2169 |
load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2170 |
__ aesenc(xmmdst, xmmtmp); |
|
2171 |
} |
|
2172 |
||
2173 |
// aesdec using specified key+offset |
|
2174 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2175 |
void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2176 |
load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2177 |
__ aesdec(xmmdst, xmmtmp); |
|
2178 |
} |
|
2179 |
||
2180 |
||
2181 |
// Arguments: |
|
2182 |
// |
|
2183 |
// Inputs: |
|
2184 |
// c_rarg0 - source byte array address |
|
2185 |
// c_rarg1 - destination byte array address |
|
2186 |
// c_rarg2 - K (key) in little endian int array |
|
2187 |
// |
|
2188 |
address generate_aescrypt_encryptBlock() { |
|
14834 | 2189 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2190 |
__ align(CodeEntryAlignment); |
2191 |
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
|
2192 |
Label L_doLast; |
|
2193 |
address start = __ pc(); |
|
2194 |
||
14834 | 2195 |
const Register from = rdx; // source array address |
14132 | 2196 |
const Register to = rdx; // destination array address |
2197 |
const Register key = rcx; // key array address |
|
2198 |
const Register keylen = rax; |
|
2199 |
const Address from_param(rbp, 8+0); |
|
2200 |
const Address to_param (rbp, 8+4); |
|
2201 |
const Address key_param (rbp, 8+8); |
|
2202 |
||
2203 |
const XMMRegister xmm_result = xmm0; |
|
14834 | 2204 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
2205 |
const XMMRegister xmm_temp1 = xmm2; |
|
2206 |
const XMMRegister xmm_temp2 = xmm3; |
|
2207 |
const XMMRegister xmm_temp3 = xmm4; |
|
2208 |
const XMMRegister xmm_temp4 = xmm5; |
|
2209 |
||
2210 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2211 |
__ movptr(from, from_param); |
|
2212 |
__ movptr(key, key_param); |
|
2213 |
||
2214 |
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
14132 | 2215 |
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2216 |
||
2217 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2218 |
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
|
14834 | 2219 |
__ movptr(to, to_param); |
14132 | 2220 |
|
2221 |
// For encryption, the java expanded key ordering is just what we need |
|
2222 |
||
14834 | 2223 |
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); |
2224 |
__ pxor(xmm_result, xmm_temp1); |
|
2225 |
||
2226 |
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
|
2227 |
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
|
2228 |
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
|
2229 |
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
|
2230 |
||
2231 |
__ aesenc(xmm_result, xmm_temp1); |
|
2232 |
__ aesenc(xmm_result, xmm_temp2); |
|
2233 |
__ aesenc(xmm_result, xmm_temp3); |
|
2234 |
__ aesenc(xmm_result, xmm_temp4); |
|
2235 |
||
2236 |
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
|
2237 |
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
|
2238 |
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2239 |
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2240 |
||
2241 |
__ aesenc(xmm_result, xmm_temp1); |
|
2242 |
__ aesenc(xmm_result, xmm_temp2); |
|
2243 |
__ aesenc(xmm_result, xmm_temp3); |
|
2244 |
__ aesenc(xmm_result, xmm_temp4); |
|
2245 |
||
2246 |
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2247 |
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2248 |
||
2249 |
__ cmpl(keylen, 44); |
|
2250 |
__ jccb(Assembler::equal, L_doLast); |
|
2251 |
||
2252 |
__ aesenc(xmm_result, xmm_temp1); |
|
2253 |
__ aesenc(xmm_result, xmm_temp2); |
|
2254 |
||
2255 |
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2256 |
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2257 |
||
2258 |
__ cmpl(keylen, 52); |
|
2259 |
__ jccb(Assembler::equal, L_doLast); |
|
2260 |
||
2261 |
__ aesenc(xmm_result, xmm_temp1); |
|
2262 |
__ aesenc(xmm_result, xmm_temp2); |
|
2263 |
||
2264 |
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2265 |
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
|
14132 | 2266 |
|
2267 |
__ BIND(L_doLast); |
|
14834 | 2268 |
__ aesenc(xmm_result, xmm_temp1); |
2269 |
__ aesenclast(xmm_result, xmm_temp2); |
|
14132 | 2270 |
__ movdqu(Address(to, 0), xmm_result); // store the result |
2271 |
__ xorptr(rax, rax); // return 0 |
|
2272 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2273 |
__ ret(0); |
|
2274 |
||
2275 |
return start; |
|
2276 |
} |
|
2277 |
||
2278 |
||
2279 |
// Arguments: |
|
2280 |
// |
|
2281 |
// Inputs: |
|
2282 |
// c_rarg0 - source byte array address |
|
2283 |
// c_rarg1 - destination byte array address |
|
2284 |
// c_rarg2 - K (key) in little endian int array |
|
2285 |
// |
|
2286 |
address generate_aescrypt_decryptBlock() { |
|
14834 | 2287 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2288 |
__ align(CodeEntryAlignment); |
2289 |
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
|
2290 |
Label L_doLast; |
|
2291 |
address start = __ pc(); |
|
2292 |
||
14834 | 2293 |
const Register from = rdx; // source array address |
14132 | 2294 |
const Register to = rdx; // destination array address |
2295 |
const Register key = rcx; // key array address |
|
2296 |
const Register keylen = rax; |
|
2297 |
const Address from_param(rbp, 8+0); |
|
2298 |
const Address to_param (rbp, 8+4); |
|
2299 |
const Address key_param (rbp, 8+8); |
|
2300 |
||
2301 |
const XMMRegister xmm_result = xmm0; |
|
14834 | 2302 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
2303 |
const XMMRegister xmm_temp1 = xmm2; |
|
2304 |
const XMMRegister xmm_temp2 = xmm3; |
|
2305 |
const XMMRegister xmm_temp3 = xmm4; |
|
2306 |
const XMMRegister xmm_temp4 = xmm5; |
|
14132 | 2307 |
|
2308 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
14834 | 2309 |
__ movptr(from, from_param); |
2310 |
__ movptr(key, key_param); |
|
2311 |
||
2312 |
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
14132 | 2313 |
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2314 |
||
2315 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2316 |
__ movdqu(xmm_result, Address(from, 0)); |
|
14834 | 2317 |
__ movptr(to, to_param); |
14132 | 2318 |
|
2319 |
// for decryption java expanded key ordering is rotated one position from what we want |
|
2320 |
// so we start from 0x10 here and hit 0x00 last |
|
2321 |
// we don't know if the key is aligned, hence not using load-execute form |
|
14834 | 2322 |
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2323 |
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
|
2324 |
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
|
2325 |
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
|
2326 |
||
2327 |
__ pxor (xmm_result, xmm_temp1); |
|
2328 |
__ aesdec(xmm_result, xmm_temp2); |
|
2329 |
__ aesdec(xmm_result, xmm_temp3); |
|
2330 |
__ aesdec(xmm_result, xmm_temp4); |
|
2331 |
||
2332 |
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
|
2333 |
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
|
2334 |
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2335 |
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2336 |
||
2337 |
__ aesdec(xmm_result, xmm_temp1); |
|
2338 |
__ aesdec(xmm_result, xmm_temp2); |
|
2339 |
__ aesdec(xmm_result, xmm_temp3); |
|
2340 |
__ aesdec(xmm_result, xmm_temp4); |
|
2341 |
||
2342 |
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2343 |
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2344 |
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); |
|
2345 |
||
2346 |
__ cmpl(keylen, 44); |
|
2347 |
__ jccb(Assembler::equal, L_doLast); |
|
2348 |
||
2349 |
__ aesdec(xmm_result, xmm_temp1); |
|
2350 |
__ aesdec(xmm_result, xmm_temp2); |
|
2351 |
||
2352 |
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2353 |
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2354 |
||
2355 |
__ cmpl(keylen, 52); |
|
2356 |
__ jccb(Assembler::equal, L_doLast); |
|
2357 |
||
2358 |
__ aesdec(xmm_result, xmm_temp1); |
|
2359 |
__ aesdec(xmm_result, xmm_temp2); |
|
2360 |
||
2361 |
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2362 |
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
|
14132 | 2363 |
|
2364 |
__ BIND(L_doLast); |
|
14834 | 2365 |
__ aesdec(xmm_result, xmm_temp1); |
2366 |
__ aesdec(xmm_result, xmm_temp2); |
|
2367 |
||
14132 | 2368 |
// for decryption the aesdeclast operation is always on key+0x00 |
14834 | 2369 |
__ aesdeclast(xmm_result, xmm_temp3); |
14132 | 2370 |
__ movdqu(Address(to, 0), xmm_result); // store the result |
2371 |
__ xorptr(rax, rax); // return 0 |
|
2372 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2373 |
__ ret(0); |
|
2374 |
||
2375 |
return start; |
|
2376 |
} |
|
2377 |
||
2378 |
void handleSOERegisters(bool saving) { |
|
2379 |
const int saveFrameSizeInBytes = 4 * wordSize; |
|
2380 |
const Address saved_rbx (rbp, -3 * wordSize); |
|
2381 |
const Address saved_rsi (rbp, -2 * wordSize); |
|
2382 |
const Address saved_rdi (rbp, -1 * wordSize); |
|
2383 |
||
2384 |
if (saving) { |
|
2385 |
__ subptr(rsp, saveFrameSizeInBytes); |
|
2386 |
__ movptr(saved_rsi, rsi); |
|
2387 |
__ movptr(saved_rdi, rdi); |
|
2388 |
__ movptr(saved_rbx, rbx); |
|
2389 |
} else { |
|
2390 |
// restoring |
|
2391 |
__ movptr(rsi, saved_rsi); |
|
2392 |
__ movptr(rdi, saved_rdi); |
|
2393 |
__ movptr(rbx, saved_rbx); |
|
2394 |
} |
|
2395 |
} |
|
2396 |
||
2397 |
// Arguments: |
|
2398 |
// |
|
2399 |
// Inputs: |
|
2400 |
// c_rarg0 - source byte array address |
|
2401 |
// c_rarg1 - destination byte array address |
|
2402 |
// c_rarg2 - K (key) in little endian int array |
|
2403 |
// c_rarg3 - r vector byte array address |
|
2404 |
// c_rarg4 - input length |
|
2405 |
// |
|
22505 | 2406 |
// Output: |
2407 |
// rax - input length |
|
2408 |
// |
|
14132 | 2409 |
address generate_cipherBlockChaining_encryptAESCrypt() { |
14834 | 2410 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2411 |
__ align(CodeEntryAlignment); |
2412 |
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); |
|
2413 |
address start = __ pc(); |
|
2414 |
||
2415 |
Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; |
|
2416 |
const Register from = rsi; // source array address |
|
2417 |
const Register to = rdx; // destination array address |
|
2418 |
const Register key = rcx; // key array address |
|
2419 |
const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2420 |
// and left with the results of the last encryption block |
|
2421 |
const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2422 |
const Register pos = rax; |
|
2423 |
||
2424 |
// xmm register assignments for the loops below |
|
2425 |
const XMMRegister xmm_result = xmm0; |
|
2426 |
const XMMRegister xmm_temp = xmm1; |
|
2427 |
// first 6 keys preloaded into xmm2-xmm7 |
|
2428 |
const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2429 |
const int XMM_REG_NUM_KEY_LAST = 7; |
|
2430 |
const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2431 |
||
2432 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2433 |
handleSOERegisters(true /*saving*/); |
|
2434 |
||
2435 |
// load registers from incoming parameters |
|
2436 |
const Address from_param(rbp, 8+0); |
|
2437 |
const Address to_param (rbp, 8+4); |
|
2438 |
const Address key_param (rbp, 8+8); |
|
2439 |
const Address rvec_param (rbp, 8+12); |
|
2440 |
const Address len_param (rbp, 8+16); |
|
2441 |
__ movptr(from , from_param); |
|
2442 |
__ movptr(to , to_param); |
|
2443 |
__ movptr(key , key_param); |
|
2444 |
__ movptr(rvec , rvec_param); |
|
2445 |
__ movptr(len_reg , len_param); |
|
2446 |
||
2447 |
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front |
|
2448 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2449 |
// load up xmm regs 2 thru 7 with keys 0-5 |
|
2450 |
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2451 |
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2452 |
offset += 0x10; |
|
2453 |
} |
|
2454 |
||
2455 |
__ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec |
|
2456 |
||
2457 |
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2458 |
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2459 |
__ cmpl(rax, 44); |
|
2460 |
__ jcc(Assembler::notEqual, L_key_192_256); |
|
2461 |
||
2462 |
// 128 bit code follows here |
|
14834 | 2463 |
__ movl(pos, 0); |
14132 | 2464 |
__ align(OptoLoopAlignment); |
2465 |
__ BIND(L_loopTop_128); |
|
2466 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
|
2467 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2468 |
||
2469 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2470 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2471 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2472 |
} |
|
2473 |
for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { |
|
2474 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2475 |
} |
|
2476 |
load_key(xmm_temp, key, 0xa0); |
|
2477 |
__ aesenclast(xmm_result, xmm_temp); |
|
2478 |
||
2479 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2480 |
// no need to store r to memory until we exit |
|
2481 |
__ addptr(pos, AESBlockSize); |
|
2482 |
__ subptr(len_reg, AESBlockSize); |
|
2483 |
__ jcc(Assembler::notEqual, L_loopTop_128); |
|
2484 |
||
2485 |
__ BIND(L_exit); |
|
2486 |
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object |
|
2487 |
||
2488 |
handleSOERegisters(false /*restoring*/); |
|
22505 | 2489 |
__ movptr(rax, len_param); // return length |
14132 | 2490 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
2491 |
__ ret(0); |
|
2492 |
||
14834 | 2493 |
__ BIND(L_key_192_256); |
2494 |
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
14132 | 2495 |
__ cmpl(rax, 52); |
2496 |
__ jcc(Assembler::notEqual, L_key_256); |
|
2497 |
||
2498 |
// 192-bit code follows here (could be changed to use more xmm registers) |
|
14834 | 2499 |
__ movl(pos, 0); |
2500 |
__ align(OptoLoopAlignment); |
|
2501 |
__ BIND(L_loopTop_192); |
|
14132 | 2502 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2503 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2504 |
||
2505 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2506 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2507 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2508 |
} |
|
2509 |
for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { |
|
2510 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2511 |
} |
|
2512 |
load_key(xmm_temp, key, 0xc0); |
|
2513 |
__ aesenclast(xmm_result, xmm_temp); |
|
2514 |
||
2515 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2516 |
// no need to store r to memory until we exit |
|
2517 |
__ addptr(pos, AESBlockSize); |
|
2518 |
__ subptr(len_reg, AESBlockSize); |
|
2519 |
__ jcc(Assembler::notEqual, L_loopTop_192); |
|
2520 |
__ jmp(L_exit); |
|
2521 |
||
14834 | 2522 |
__ BIND(L_key_256); |
14132 | 2523 |
// 256-bit code follows here (could be changed to use more xmm registers) |
14834 | 2524 |
__ movl(pos, 0); |
2525 |
__ align(OptoLoopAlignment); |
|
2526 |
__ BIND(L_loopTop_256); |
|
14132 | 2527 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2528 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2529 |
||
2530 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2531 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2532 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2533 |
} |
|
2534 |
for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { |
|
2535 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2536 |
} |
|
2537 |
load_key(xmm_temp, key, 0xe0); |
|
2538 |
__ aesenclast(xmm_result, xmm_temp); |
|
2539 |
||
2540 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2541 |
// no need to store r to memory until we exit |
|
2542 |
__ addptr(pos, AESBlockSize); |
|
2543 |
__ subptr(len_reg, AESBlockSize); |
|
2544 |
__ jcc(Assembler::notEqual, L_loopTop_256); |
|
2545 |
__ jmp(L_exit); |
|
2546 |
||
2547 |
return start; |
|
2548 |
} |
|
2549 |
||
2550 |
||
2551 |
// CBC AES Decryption. |
|
2552 |
// In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. |
|
2553 |
// |
|
2554 |
// Arguments: |
|
2555 |
// |
|
2556 |
// Inputs: |
|
2557 |
// c_rarg0 - source byte array address |
|
2558 |
// c_rarg1 - destination byte array address |
|
2559 |
// c_rarg2 - K (key) in little endian int array |
|
2560 |
// c_rarg3 - r vector byte array address |
|
2561 |
// c_rarg4 - input length |
|
2562 |
// |
|
22505 | 2563 |
// Output: |
2564 |
// rax - input length |
|
2565 |
// |
|
14132 | 2566 |
|
2567 |
address generate_cipherBlockChaining_decryptAESCrypt() { |
|
14834 | 2568 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2569 |
__ align(CodeEntryAlignment); |
2570 |
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); |
|
2571 |
address start = __ pc(); |
|
2572 |
||
2573 |
Label L_exit, L_key_192_256, L_key_256; |
|
2574 |
Label L_singleBlock_loopTop_128; |
|
2575 |
Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; |
|
2576 |
const Register from = rsi; // source array address |
|
2577 |
const Register to = rdx; // destination array address |
|
2578 |
const Register key = rcx; // key array address |
|
2579 |
const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2580 |
// and left with the results of the last encryption block |
|
2581 |
const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2582 |
const Register pos = rax; |
|
2583 |
||
2584 |
// xmm register assignments for the loops below |
|
2585 |
const XMMRegister xmm_result = xmm0; |
|
2586 |
const XMMRegister xmm_temp = xmm1; |
|
2587 |
// first 6 keys preloaded into xmm2-xmm7 |
|
2588 |
const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2589 |
const int XMM_REG_NUM_KEY_LAST = 7; |
|
2590 |
const int FIRST_NON_REG_KEY_offset = 0x70; |
|
2591 |
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2592 |
||
2593 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2594 |
handleSOERegisters(true /*saving*/); |
|
2595 |
||
2596 |
// load registers from incoming parameters |
|
2597 |
const Address from_param(rbp, 8+0); |
|
2598 |
const Address to_param (rbp, 8+4); |
|
2599 |
const Address key_param (rbp, 8+8); |
|
2600 |
const Address rvec_param (rbp, 8+12); |
|
2601 |
const Address len_param (rbp, 8+16); |
|
2602 |
__ movptr(from , from_param); |
|
2603 |
__ movptr(to , to_param); |
|
2604 |
__ movptr(key , key_param); |
|
2605 |
__ movptr(rvec , rvec_param); |
|
2606 |
__ movptr(len_reg , len_param); |
|
2607 |
||
2608 |
// the java expanded key ordering is rotated one position from what we want |
|
2609 |
// so we start from 0x10 here and hit 0x00 last |
|
2610 |
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front |
|
2611 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2612 |
// load up xmm regs 2 thru 6 with first 5 keys |
|
2613 |
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2614 |
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2615 |
offset += 0x10; |
|
2616 |
} |
|
2617 |
||
2618 |
// inside here, use the rvec register to point to previous block cipher |
|
2619 |
// with which we xor at the end of each newly decrypted block |
|
2620 |
const Register prev_block_cipher_ptr = rvec; |
|
2621 |
||
2622 |
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2623 |
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2624 |
__ cmpl(rax, 44); |
|
2625 |
__ jcc(Assembler::notEqual, L_key_192_256); |
|
2626 |
||
2627 |
||
2628 |
// 128-bit code follows here, parallelized |
|
14834 | 2629 |
__ movl(pos, 0); |
2630 |
__ align(OptoLoopAlignment); |
|
2631 |
__ BIND(L_singleBlock_loopTop_128); |
|
14132 | 2632 |
__ cmpptr(len_reg, 0); // any blocks left?? |
2633 |
__ jcc(Assembler::equal, L_exit); |
|
2634 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2635 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2636 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2637 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2638 |
} |
|
2639 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 |
|
2640 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2641 |
} |
|
2642 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2643 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2644 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2645 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2646 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2647 |
// no need to store r to memory until we exit |
|
2648 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2649 |
__ addptr(pos, AESBlockSize); |
|
2650 |
__ subptr(len_reg, AESBlockSize); |
|
2651 |
__ jmp(L_singleBlock_loopTop_128); |
|
2652 |
||
2653 |
||
2654 |
__ BIND(L_exit); |
|
2655 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2656 |
__ movptr(rvec , rvec_param); // restore this since used in loop |
|
2657 |
__ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object |
|
2658 |
handleSOERegisters(false /*restoring*/); |
|
22505 | 2659 |
__ movptr(rax, len_param); // return length |
14132 | 2660 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
2661 |
__ ret(0); |
|
2662 |
||
2663 |
||
2664 |
__ BIND(L_key_192_256); |
|
2665 |
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
2666 |
__ cmpl(rax, 52); |
|
2667 |
__ jcc(Assembler::notEqual, L_key_256); |
|
2668 |
||
2669 |
// 192-bit code follows here (could be optimized to use parallelism) |
|
14834 | 2670 |
__ movl(pos, 0); |
14132 | 2671 |
__ align(OptoLoopAlignment); |
2672 |
__ BIND(L_singleBlock_loopTop_192); |
|
2673 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2674 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2675 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2676 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2677 |
} |
|
2678 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 |
|
2679 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2680 |
} |
|
2681 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2682 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2683 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2684 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2685 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2686 |
// no need to store r to memory until we exit |
|
2687 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2688 |
__ addptr(pos, AESBlockSize); |
|
2689 |
__ subptr(len_reg, AESBlockSize); |
|
2690 |
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); |
|
2691 |
__ jmp(L_exit); |
|
2692 |
||
2693 |
__ BIND(L_key_256); |
|
2694 |
// 256-bit code follows here (could be optimized to use parallelism) |
|
14834 | 2695 |
__ movl(pos, 0); |
14132 | 2696 |
__ align(OptoLoopAlignment); |
2697 |
__ BIND(L_singleBlock_loopTop_256); |
|
2698 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2699 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2700 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2701 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2702 |
} |
|
2703 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 |
|
2704 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2705 |
} |
|
2706 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2707 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2708 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2709 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2710 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2711 |
// no need to store r to memory until we exit |
|
2712 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2713 |
__ addptr(pos, AESBlockSize); |
|
2714 |
__ subptr(len_reg, AESBlockSize); |
|
2715 |
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); |
|
2716 |
__ jmp(L_exit); |
|
2717 |
||
2718 |
return start; |
|
2719 |
} |
|
2720 |
||
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2721 |
/** |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2722 |
* Arguments: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2723 |
* |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2724 |
* Inputs: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2725 |
* rsp(4) - int crc |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2726 |
* rsp(8) - byte* buf |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2727 |
* rsp(12) - int length |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2728 |
* |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2729 |
* Ouput: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2730 |
* rax - int crc result |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2731 |
*/ |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2732 |
address generate_updateBytesCRC32() { |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2733 |
assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2734 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2735 |
__ align(CodeEntryAlignment); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2736 |
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2737 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2738 |
address start = __ pc(); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2739 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2740 |
const Register crc = rdx; // crc |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2741 |
const Register buf = rsi; // source java byte array address |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2742 |
const Register len = rcx; // length |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2743 |
const Register table = rdi; // crc_table address (reuse register) |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2744 |
const Register tmp = rbx; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2745 |
assert_different_registers(crc, buf, len, table, tmp, rax); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2746 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2747 |
BLOCK_COMMENT("Entry:"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2748 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2749 |
__ push(rsi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2750 |
__ push(rdi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2751 |
__ push(rbx); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2752 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2753 |
Address crc_arg(rbp, 8 + 0); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2754 |
Address buf_arg(rbp, 8 + 4); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2755 |
Address len_arg(rbp, 8 + 8); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2756 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2757 |
// Load up: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2758 |
__ movl(crc, crc_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2759 |
__ movptr(buf, buf_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2760 |
__ movl(len, len_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2761 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2762 |
__ kernel_crc32(crc, buf, len, table, tmp); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2763 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2764 |
__ movl(rax, crc); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2765 |
__ pop(rbx); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2766 |
__ pop(rdi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2767 |
__ pop(rsi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2768 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2769 |
__ ret(0); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2770 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2771 |
return start; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2772 |
} |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2773 |
|
18740 | 2774 |
// Safefetch stubs. |
2775 |
void generate_safefetch(const char* name, int size, address* entry, |
|
2776 |
address* fault_pc, address* continuation_pc) { |
|
2777 |
// safefetch signatures: |
|
2778 |
// int SafeFetch32(int* adr, int errValue); |
|
2779 |
// intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); |
|
2780 |
||
2781 |
StubCodeMark mark(this, "StubRoutines", name); |
|
2782 |
||
2783 |
// Entry point, pc or function descriptor. |
|
2784 |
*entry = __ pc(); |
|
2785 |
||
2786 |
__ movl(rax, Address(rsp, 0x8)); |
|
2787 |
__ movl(rcx, Address(rsp, 0x4)); |
|
2788 |
// Load *adr into eax, may fault. |
|
2789 |
*fault_pc = __ pc(); |
|
2790 |
switch (size) { |
|
2791 |
case 4: |
|
2792 |
// int32_t |
|
2793 |
__ movl(rax, Address(rcx, 0)); |
|
2794 |
break; |
|
2795 |
case 8: |
|
2796 |
// int64_t |
|
2797 |
Unimplemented(); |
|
2798 |
break; |
|
2799 |
default: |
|
2800 |
ShouldNotReachHere(); |
|
2801 |
} |
|
2802 |
||
2803 |
// Return errValue or *adr. |
|
2804 |
*continuation_pc = __ pc(); |
|
2805 |
__ ret(0); |
|
2806 |
} |
|
14132 | 2807 |
|
1 | 2808 |
public: |
2809 |
// Information about frame layout at time of blocking runtime call. |
|
2810 |
// Note that we only have to preserve callee-saved registers since |
|
2811 |
// the compilers are responsible for supplying a continuation point |
|
2812 |
// if they expect all registers to be preserved. |
|
2813 |
enum layout { |
|
2814 |
thread_off, // last_java_sp |
|
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2815 |
arg1_off, |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2816 |
arg2_off, |
1 | 2817 |
rbp_off, // callee saved register |
2818 |
ret_pc, |
|
2819 |
framesize |
|
2820 |
}; |
|
2821 |
||
2822 |
private: |
|
2823 |
||
2824 |
#undef __ |
|
2825 |
#define __ masm-> |
|
2826 |
||
2827 |
//------------------------------------------------------------------------------------------------------------------------ |
|
2828 |
// Continuation point for throwing of implicit exceptions that are not handled in |
|
2829 |
// the current activation. Fabricates an exception oop and initiates normal |
|
2830 |
// exception dispatching in this frame. |
|
2831 |
// |
|
2832 |
// Previously the compiler (c2) allowed for callee save registers on Java calls. |
|
2833 |
// This is no longer true after adapter frames were removed but could possibly |
|
2834 |
// be brought back in the future if the interpreter code was reworked and it |
|
2835 |
// was deemed worthwhile. The comment below was left to describe what must |
|
2836 |
// happen here if callee saves were resurrected. As it stands now this stub |
|
2837 |
// could actually be a vanilla BufferBlob and have now oopMap at all. |
|
2838 |
// Since it doesn't make much difference we've chosen to leave it the |
|
2839 |
// way it was in the callee save days and keep the comment. |
|
2840 |
||
2841 |
// If we need to preserve callee-saved values we need a callee-saved oop map and |
|
2842 |
// therefore have to make these stubs into RuntimeStubs rather than BufferBlobs. |
|
2843 |
// If the compiler needs all registers to be preserved between the fault |
|
2844 |
// point and the exception handler then it must assume responsibility for that in |
|
2845 |
// AbstractCompiler::continuation_for_implicit_null_exception or |
|
2846 |
// continuation_for_implicit_division_by_zero_exception. All other implicit |
|
2847 |
// exceptions (e.g., NullPointerException or AbstractMethodError on entry) are |
|
2848 |
// either at call sites or otherwise assume that stack unwinding will be initiated, |
|
2849 |
// so caller saved registers were assumed volatile in the compiler. |
|
2850 |
address generate_throw_exception(const char* name, address runtime_entry, |
|
10545 | 2851 |
Register arg1 = noreg, Register arg2 = noreg) { |
1 | 2852 |
|
2853 |
int insts_size = 256; |
|
2854 |
int locs_size = 32; |
|
2855 |
||
2856 |
CodeBuffer code(name, insts_size, locs_size); |
|
2857 |
OopMapSet* oop_maps = new OopMapSet(); |
|
2858 |
MacroAssembler* masm = new MacroAssembler(&code); |
|
2859 |
||
2860 |
address start = __ pc(); |
|
2861 |
||
2862 |
// This is an inlined and slightly modified version of call_VM |
|
2863 |
// which has the ability to fetch the return PC out of |
|
2864 |
// thread-local storage and also sets up last_Java_sp slightly |
|
2865 |
// differently than the real call_VM |
|
2866 |
Register java_thread = rbx; |
|
2867 |
__ get_thread(java_thread); |
|
2868 |
||
2869 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2870 |
||
2871 |
// pc and rbp, already pushed |
|
1066 | 2872 |
__ subptr(rsp, (framesize-2) * wordSize); // prolog |
1 | 2873 |
|
2874 |
// Frame is now completed as far as size and linkage. |
|
2875 |
||
2876 |
int frame_complete = __ pc() - start; |
|
2877 |
||
2878 |
// push java thread (becomes first argument of C function) |
|
1066 | 2879 |
__ movptr(Address(rsp, thread_off * wordSize), java_thread); |
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2880 |
if (arg1 != noreg) { |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2881 |
__ movptr(Address(rsp, arg1_off * wordSize), arg1); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2882 |
} |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2883 |
if (arg2 != noreg) { |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2884 |
assert(arg1 != noreg, "missing reg arg"); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2885 |
__ movptr(Address(rsp, arg2_off * wordSize), arg2); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2886 |
} |
1 | 2887 |
|
2888 |
// Set up last_Java_sp and last_Java_fp |
|
2889 |
__ set_last_Java_frame(java_thread, rsp, rbp, NULL); |
|
2890 |
||
2891 |
// Call runtime |
|
2892 |
BLOCK_COMMENT("call runtime_entry"); |
|
2893 |
__ call(RuntimeAddress(runtime_entry)); |
|
2894 |
// Generate oop map |
|
2895 |
OopMap* map = new OopMap(framesize, 0); |
|
2896 |
oop_maps->add_gc_map(__ pc() - start, map); |
|
2897 |
||
2898 |
// restore the thread (cannot use the pushed argument since arguments |
|
2899 |
// may be overwritten by C code generated by an optimizing compiler); |
|
2900 |
// however can use the register value directly if it is callee saved. |
|
2901 |
__ get_thread(java_thread); |
|
2902 |
||
2903 |
__ reset_last_Java_frame(java_thread, true, false); |
|
2904 |
||
2905 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2906 |
||
2907 |
// check for pending exceptions |
|
2908 |
#ifdef ASSERT |
|
2909 |
Label L; |
|
1066 | 2910 |
__ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
1 | 2911 |
__ jcc(Assembler::notEqual, L); |
2912 |
__ should_not_reach_here(); |
|
2913 |
__ bind(L); |
|
2914 |
#endif /* ASSERT */ |
|
2915 |
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); |
|
2916 |
||
2917 |
||
2918 |
RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false); |
|
2919 |
return stub->entry_point(); |
|
2920 |
} |
|
2921 |
||
2922 |
||
2923 |
void create_control_words() { |
|
2924 |
// Round to nearest, 53-bit mode, exceptions masked |
|
2925 |
StubRoutines::_fpu_cntrl_wrd_std = 0x027F; |
|
2926 |
// Round to zero, 53-bit mode, exception mased |
|
2927 |
StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F; |
|
2928 |
// Round to nearest, 24-bit mode, exceptions masked |
|
2929 |
StubRoutines::_fpu_cntrl_wrd_24 = 0x007F; |
|
2930 |
// Round to nearest, 64-bit mode, exceptions masked |
|
2931 |
StubRoutines::_fpu_cntrl_wrd_64 = 0x037F; |
|
2932 |
// Round to nearest, 64-bit mode, exceptions masked |
|
2933 |
StubRoutines::_mxcsr_std = 0x1F80; |
|
2934 |
// Note: the following two constants are 80-bit values |
|
2935 |
// layout is critical for correct loading by FPU. |
|
2936 |
// Bias for strict fp multiply/divide |
|
2937 |
StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000 |
|
2938 |
StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000; |
|
2939 |
StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff; |
|
2940 |
// Un-Bias for strict fp multiply/divide |
|
2941 |
StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000 |
|
2942 |
StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000; |
|
2943 |
StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff; |
|
2944 |
} |
|
2945 |
||
2946 |
//--------------------------------------------------------------------------- |
|
2947 |
// Initialization |
|
2948 |
||
2949 |
void generate_initial() { |
|
2950 |
// Generates all stubs and initializes the entry points |
|
2951 |
||
2952 |
//------------------------------------------------------------------------------------------------------------------------ |
|
2953 |
// entry points that exist in all platforms |
|
2954 |
// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than |
|
2955 |
// the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. |
|
2956 |
StubRoutines::_forward_exception_entry = generate_forward_exception(); |
|
2957 |
||
2958 |
StubRoutines::_call_stub_entry = |
|
2959 |
generate_call_stub(StubRoutines::_call_stub_return_address); |
|
2960 |
// is referenced by megamorphic call |
|
2961 |
StubRoutines::_catch_exception_entry = generate_catch_exception(); |
|
2962 |
||
2963 |
// These are currently used by Solaris/Intel |
|
2964 |
StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); |
|
2965 |
||
2966 |
StubRoutines::_handler_for_unsafe_access_entry = |
|
2967 |
generate_handler_for_unsafe_access(); |
|
2968 |
||
2969 |
// platform dependent |
|
2970 |
create_control_words(); |
|
2971 |
||
1066 | 2972 |
StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); |
2973 |
StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd(); |
|
1 | 2974 |
StubRoutines::_d2i_wrapper = generate_d2i_wrapper(T_INT, |
2975 |
CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); |
|
2976 |
StubRoutines::_d2l_wrapper = generate_d2i_wrapper(T_LONG, |
|
2977 |
CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); |
|
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2978 |
|
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2979 |
// Build this early so it's available for the interpreter |
11411 | 2980 |
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); |
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2981 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2982 |
if (UseCRC32Intrinsics) { |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2983 |
// set table address before stub generation which use it |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2984 |
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2985 |
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2986 |
} |
1 | 2987 |
} |
2988 |
||
2989 |
||
2990 |
void generate_all() { |
|
2991 |
// Generates all stubs and initializes the entry points |
|
2992 |
||
2993 |
// These entry points require SharedInfo::stack0 to be set up in non-core builds |
|
2994 |
// and need to be relocatable, so they each fabricate a RuntimeStub internally. |
|
10545 | 2995 |
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); |
2996 |
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); |
|
2997 |
StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); |
|
1 | 2998 |
|
2999 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3000 |
// entry points that are platform specific |
|
3001 |
||
3002 |
// support for verify_oop (must happen after universe_init) |
|
3003 |
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); |
|
3004 |
||
3005 |
// arraycopy stubs used by compilers |
|
3006 |
generate_arraycopy_stubs(); |
|
2534 | 3007 |
|
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
3008 |
generate_math_stubs(); |
14132 | 3009 |
|
3010 |
// don't bother generating these AES intrinsic stubs unless global flag is set |
|
3011 |
if (UseAESIntrinsics) { |
|
3012 |
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others |
|
3013 |
||
3014 |
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); |
|
3015 |
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); |
|
3016 |
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); |
|
3017 |
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); |
|
3018 |
} |
|
18740 | 3019 |
|
3020 |
// Safefetch stubs. |
|
3021 |
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, |
|
3022 |
&StubRoutines::_safefetch32_fault_pc, |
|
3023 |
&StubRoutines::_safefetch32_continuation_pc); |
|
3024 |
StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; |
|
3025 |
StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; |
|
3026 |
StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; |
|
1 | 3027 |
} |
3028 |
||
3029 |
||
3030 |
public: |
|
3031 |
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |
|
3032 |
if (all) { |
|
3033 |
generate_all(); |
|
3034 |
} else { |
|
3035 |
generate_initial(); |
|
3036 |
} |
|
3037 |
} |
|
3038 |
}; // end class declaration |
|
3039 |
||
3040 |
||
3041 |
void StubGenerator_generate(CodeBuffer* code, bool all) { |
|
3042 |
StubGenerator g(code, all); |
|
3043 |
} |