author | ascarpino |
Wed, 17 Jun 2015 17:48:25 -0700 | |
changeset 31404 | 63e8fcd70bfc |
parent 30624 | 2e1803c8a26d |
child 31771 | c9f593020799 |
permissions | -rw-r--r-- |
1 | 1 |
/* |
29325 | 2 |
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. |
1 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
5547
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
20 |
* or visit www.oracle.com if you need additional information or have any |
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
21 |
* questions. |
1 | 22 |
* |
23 |
*/ |
|
24 |
||
7397 | 25 |
#include "precompiled.hpp" |
14626
0cf4eccf130f
8003240: x86: move MacroAssembler into separate file
twisti
parents:
14132
diff
changeset
|
26 |
#include "asm/macroAssembler.hpp" |
0cf4eccf130f
8003240: x86: move MacroAssembler into separate file
twisti
parents:
14132
diff
changeset
|
27 |
#include "asm/macroAssembler.inline.hpp" |
7397 | 28 |
#include "interpreter/interpreter.hpp" |
29 |
#include "nativeInst_x86.hpp" |
|
30 |
#include "oops/instanceOop.hpp" |
|
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
31 |
#include "oops/method.hpp" |
7397 | 32 |
#include "oops/objArrayKlass.hpp" |
33 |
#include "oops/oop.inline.hpp" |
|
34 |
#include "prims/methodHandles.hpp" |
|
35 |
#include "runtime/frame.inline.hpp" |
|
36 |
#include "runtime/handles.inline.hpp" |
|
37 |
#include "runtime/sharedRuntime.hpp" |
|
38 |
#include "runtime/stubCodeGenerator.hpp" |
|
39 |
#include "runtime/stubRoutines.hpp" |
|
14583
d70ee55535f4
8003935: Simplify the needed includes for using Thread::current()
stefank
parents:
14132
diff
changeset
|
40 |
#include "runtime/thread.inline.hpp" |
7397 | 41 |
#include "utilities/top.hpp" |
42 |
#ifdef COMPILER2 |
|
43 |
#include "opto/runtime.hpp" |
|
44 |
#endif |
|
1 | 45 |
|
46 |
// Declaration and definition of StubGenerator (no .hpp file). |
|
47 |
// For a more detailed description of the stub routine structure |
|
48 |
// see the comment in stubRoutines.hpp |
|
49 |
||
50 |
#define __ _masm-> |
|
1066 | 51 |
#define a__ ((Assembler*)_masm)-> |
1 | 52 |
|
53 |
#ifdef PRODUCT |
|
54 |
#define BLOCK_COMMENT(str) /* nothing */ |
|
55 |
#else |
|
56 |
#define BLOCK_COMMENT(str) __ block_comment(str) |
|
57 |
#endif |
|
58 |
||
59 |
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") |
|
60 |
||
61 |
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions |
|
62 |
const int FPU_CNTRL_WRD_MASK = 0xFFFF; |
|
63 |
||
64 |
// ------------------------------------------------------------------------------------------------------------------------- |
|
65 |
// Stub Code definitions |
|
66 |
||
67 |
static address handle_unsafe_access() { |
|
68 |
JavaThread* thread = JavaThread::current(); |
|
69 |
address pc = thread->saved_exception_pc(); |
|
70 |
// pc is the instruction which we must emulate |
|
71 |
// doing a no-op is fine: return garbage from the load |
|
72 |
// therefore, compute npc |
|
73 |
address npc = Assembler::locate_next_instruction(pc); |
|
74 |
||
75 |
// request an async exception |
|
76 |
thread->set_pending_unsafe_access_error(); |
|
77 |
||
78 |
// return address of next instruction to execute |
|
79 |
return npc; |
|
80 |
} |
|
81 |
||
82 |
class StubGenerator: public StubCodeGenerator { |
|
83 |
private: |
|
84 |
||
85 |
#ifdef PRODUCT |
|
18073
f02460441ddc
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
17622
diff
changeset
|
86 |
#define inc_counter_np(counter) ((void)0) |
1 | 87 |
#else |
88 |
void inc_counter_np_(int& counter) { |
|
1066 | 89 |
__ incrementl(ExternalAddress((address)&counter)); |
1 | 90 |
} |
91 |
#define inc_counter_np(counter) \ |
|
92 |
BLOCK_COMMENT("inc_counter " #counter); \ |
|
93 |
inc_counter_np_(counter); |
|
94 |
#endif //PRODUCT |
|
95 |
||
96 |
void inc_copy_counter_np(BasicType t) { |
|
97 |
#ifndef PRODUCT |
|
98 |
switch (t) { |
|
99 |
case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return; |
|
100 |
case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return; |
|
101 |
case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return; |
|
102 |
case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return; |
|
103 |
case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return; |
|
104 |
} |
|
105 |
ShouldNotReachHere(); |
|
106 |
#endif //PRODUCT |
|
107 |
} |
|
108 |
||
109 |
//------------------------------------------------------------------------------------------------------------------------ |
|
110 |
// Call stubs are used to call Java from C |
|
111 |
// |
|
112 |
// [ return_from_Java ] <--- rsp |
|
113 |
// [ argument word n ] |
|
114 |
// ... |
|
115 |
// -N [ argument word 1 ] |
|
116 |
// -7 [ Possible padding for stack alignment ] |
|
117 |
// -6 [ Possible padding for stack alignment ] |
|
118 |
// -5 [ Possible padding for stack alignment ] |
|
119 |
// -4 [ mxcsr save ] <--- rsp_after_call |
|
120 |
// -3 [ saved rbx, ] |
|
121 |
// -2 [ saved rsi ] |
|
122 |
// -1 [ saved rdi ] |
|
123 |
// 0 [ saved rbp, ] <--- rbp, |
|
124 |
// 1 [ return address ] |
|
125 |
// 2 [ ptr. to call wrapper ] |
|
126 |
// 3 [ result ] |
|
127 |
// 4 [ result_type ] |
|
128 |
// 5 [ method ] |
|
129 |
// 6 [ entry_point ] |
|
130 |
// 7 [ parameters ] |
|
131 |
// 8 [ parameter_size ] |
|
132 |
// 9 [ thread ] |
|
133 |
||
134 |
||
135 |
address generate_call_stub(address& return_address) { |
|
136 |
StubCodeMark mark(this, "StubRoutines", "call_stub"); |
|
137 |
address start = __ pc(); |
|
138 |
||
139 |
// stub code parameters / addresses |
|
140 |
assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code"); |
|
141 |
bool sse_save = false; |
|
142 |
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()! |
|
143 |
const int locals_count_in_bytes (4*wordSize); |
|
144 |
const Address mxcsr_save (rbp, -4 * wordSize); |
|
145 |
const Address saved_rbx (rbp, -3 * wordSize); |
|
146 |
const Address saved_rsi (rbp, -2 * wordSize); |
|
147 |
const Address saved_rdi (rbp, -1 * wordSize); |
|
148 |
const Address result (rbp, 3 * wordSize); |
|
149 |
const Address result_type (rbp, 4 * wordSize); |
|
150 |
const Address method (rbp, 5 * wordSize); |
|
151 |
const Address entry_point (rbp, 6 * wordSize); |
|
152 |
const Address parameters (rbp, 7 * wordSize); |
|
153 |
const Address parameter_size(rbp, 8 * wordSize); |
|
154 |
const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! |
|
155 |
sse_save = UseSSE > 0; |
|
156 |
||
157 |
// stub code |
|
158 |
__ enter(); |
|
1066 | 159 |
__ movptr(rcx, parameter_size); // parameter counter |
5419 | 160 |
__ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes |
1066 | 161 |
__ addptr(rcx, locals_count_in_bytes); // reserve space for register saves |
162 |
__ subptr(rsp, rcx); |
|
163 |
__ andptr(rsp, -(StackAlignmentInBytes)); // Align stack |
|
1 | 164 |
|
165 |
// save rdi, rsi, & rbx, according to C calling conventions |
|
1066 | 166 |
__ movptr(saved_rdi, rdi); |
167 |
__ movptr(saved_rsi, rsi); |
|
168 |
__ movptr(saved_rbx, rbx); |
|
30624 | 169 |
|
170 |
// provide initial value for required masks |
|
171 |
if (UseAVX > 2) { |
|
172 |
__ movl(rbx, 0xffff); |
|
173 |
__ kmovdl(k1, rbx); |
|
174 |
} |
|
175 |
||
1 | 176 |
// save and initialize %mxcsr |
177 |
if (sse_save) { |
|
178 |
Label skip_ldmx; |
|
179 |
__ stmxcsr(mxcsr_save); |
|
180 |
__ movl(rax, mxcsr_save); |
|
181 |
__ andl(rax, MXCSR_MASK); // Only check control and mask bits |
|
182 |
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); |
|
183 |
__ cmp32(rax, mxcsr_std); |
|
184 |
__ jcc(Assembler::equal, skip_ldmx); |
|
185 |
__ ldmxcsr(mxcsr_std); |
|
186 |
__ bind(skip_ldmx); |
|
187 |
} |
|
188 |
||
189 |
// make sure the control word is correct. |
|
190 |
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); |
|
191 |
||
192 |
#ifdef ASSERT |
|
193 |
// make sure we have no pending exceptions |
|
194 |
{ Label L; |
|
1066 | 195 |
__ movptr(rcx, thread); |
196 |
__ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
1 | 197 |
__ jcc(Assembler::equal, L); |
198 |
__ stop("StubRoutines::call_stub: entered with pending exception"); |
|
199 |
__ bind(L); |
|
200 |
} |
|
201 |
#endif |
|
202 |
||
203 |
// pass parameters if any |
|
204 |
BLOCK_COMMENT("pass parameters if any"); |
|
205 |
Label parameters_done; |
|
206 |
__ movl(rcx, parameter_size); // parameter counter |
|
207 |
__ testl(rcx, rcx); |
|
208 |
__ jcc(Assembler::zero, parameters_done); |
|
209 |
||
210 |
// parameter passing loop |
|
211 |
||
212 |
Label loop; |
|
213 |
// Copy Java parameters in reverse order (receiver last) |
|
214 |
// Note that the argument order is inverted in the process |
|
215 |
// source is rdx[rcx: N-1..0] |
|
216 |
// dest is rsp[rbx: 0..N-1] |
|
217 |
||
1066 | 218 |
__ movptr(rdx, parameters); // parameter pointer |
219 |
__ xorptr(rbx, rbx); |
|
1 | 220 |
|
221 |
__ BIND(loop); |
|
222 |
||
223 |
// get parameter |
|
1066 | 224 |
__ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize)); |
225 |
__ movptr(Address(rsp, rbx, Interpreter::stackElementScale(), |
|
1 | 226 |
Interpreter::expr_offset_in_bytes(0)), rax); // store parameter |
227 |
__ increment(rbx); |
|
228 |
__ decrement(rcx); |
|
229 |
__ jcc(Assembler::notZero, loop); |
|
230 |
||
231 |
// call Java function |
|
232 |
__ BIND(parameters_done); |
|
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
233 |
__ movptr(rbx, method); // get Method* |
1066 | 234 |
__ movptr(rax, entry_point); // get entry_point |
235 |
__ mov(rsi, rsp); // set sender sp |
|
1 | 236 |
BLOCK_COMMENT("call Java function"); |
237 |
__ call(rax); |
|
238 |
||
239 |
BLOCK_COMMENT("call_stub_return_address:"); |
|
240 |
return_address = __ pc(); |
|
241 |
||
8315
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
242 |
#ifdef COMPILER2 |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
243 |
{ |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
244 |
Label L_skip; |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
245 |
if (UseSSE >= 2) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
246 |
__ verify_FPU(0, "call_stub_return"); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
247 |
} else { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
248 |
for (int i = 1; i < 8; i++) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
249 |
__ ffree(i); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
250 |
} |
1 | 251 |
|
8315
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
252 |
// UseSSE <= 1 so double result should be left on TOS |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
253 |
__ movl(rsi, result_type); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
254 |
__ cmpl(rsi, T_DOUBLE); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
255 |
__ jcc(Assembler::equal, L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
256 |
if (UseSSE == 0) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
257 |
// UseSSE == 0 so float result should be left on TOS |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
258 |
__ cmpl(rsi, T_FLOAT); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
259 |
__ jcc(Assembler::equal, L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
260 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
261 |
__ ffree(0); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
262 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
263 |
__ BIND(L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
264 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
265 |
#endif // COMPILER2 |
1 | 266 |
|
267 |
// store result depending on type |
|
268 |
// (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) |
|
1066 | 269 |
__ movptr(rdi, result); |
1 | 270 |
Label is_long, is_float, is_double, exit; |
271 |
__ movl(rsi, result_type); |
|
272 |
__ cmpl(rsi, T_LONG); |
|
273 |
__ jcc(Assembler::equal, is_long); |
|
274 |
__ cmpl(rsi, T_FLOAT); |
|
275 |
__ jcc(Assembler::equal, is_float); |
|
276 |
__ cmpl(rsi, T_DOUBLE); |
|
277 |
__ jcc(Assembler::equal, is_double); |
|
278 |
||
279 |
// handle T_INT case |
|
280 |
__ movl(Address(rdi, 0), rax); |
|
281 |
__ BIND(exit); |
|
282 |
||
283 |
// check that FPU stack is empty |
|
284 |
__ verify_FPU(0, "generate_call_stub"); |
|
285 |
||
286 |
// pop parameters |
|
1066 | 287 |
__ lea(rsp, rsp_after_call); |
1 | 288 |
|
289 |
// restore %mxcsr |
|
290 |
if (sse_save) { |
|
291 |
__ ldmxcsr(mxcsr_save); |
|
292 |
} |
|
293 |
||
294 |
// restore rdi, rsi and rbx, |
|
1066 | 295 |
__ movptr(rbx, saved_rbx); |
296 |
__ movptr(rsi, saved_rsi); |
|
297 |
__ movptr(rdi, saved_rdi); |
|
298 |
__ addptr(rsp, 4*wordSize); |
|
1 | 299 |
|
300 |
// return |
|
1066 | 301 |
__ pop(rbp); |
1 | 302 |
__ ret(0); |
303 |
||
304 |
// handle return types different from T_INT |
|
305 |
__ BIND(is_long); |
|
306 |
__ movl(Address(rdi, 0 * wordSize), rax); |
|
307 |
__ movl(Address(rdi, 1 * wordSize), rdx); |
|
308 |
__ jmp(exit); |
|
309 |
||
310 |
__ BIND(is_float); |
|
311 |
// interpreter uses xmm0 for return values |
|
312 |
if (UseSSE >= 1) { |
|
313 |
__ movflt(Address(rdi, 0), xmm0); |
|
314 |
} else { |
|
315 |
__ fstp_s(Address(rdi, 0)); |
|
316 |
} |
|
317 |
__ jmp(exit); |
|
318 |
||
319 |
__ BIND(is_double); |
|
320 |
// interpreter uses xmm0 for return values |
|
321 |
if (UseSSE >= 2) { |
|
322 |
__ movdbl(Address(rdi, 0), xmm0); |
|
323 |
} else { |
|
324 |
__ fstp_d(Address(rdi, 0)); |
|
325 |
} |
|
326 |
__ jmp(exit); |
|
327 |
||
328 |
return start; |
|
329 |
} |
|
330 |
||
331 |
||
332 |
//------------------------------------------------------------------------------------------------------------------------ |
|
333 |
// Return point for a Java call if there's an exception thrown in Java code. |
|
334 |
// The exception is caught and transformed into a pending exception stored in |
|
335 |
// JavaThread that can be tested from within the VM. |
|
336 |
// |
|
337 |
// Note: Usually the parameters are removed by the callee. In case of an exception |
|
338 |
// crossing an activation frame boundary, that is not the case if the callee |
|
339 |
// is compiled code => need to setup the rsp. |
|
340 |
// |
|
341 |
// rax,: exception oop |
|
342 |
||
343 |
address generate_catch_exception() { |
|
344 |
StubCodeMark mark(this, "StubRoutines", "catch_exception"); |
|
345 |
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()! |
|
346 |
const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()! |
|
347 |
address start = __ pc(); |
|
348 |
||
349 |
// get thread directly |
|
1066 | 350 |
__ movptr(rcx, thread); |
1 | 351 |
#ifdef ASSERT |
352 |
// verify that threads correspond |
|
353 |
{ Label L; |
|
354 |
__ get_thread(rbx); |
|
1066 | 355 |
__ cmpptr(rbx, rcx); |
1 | 356 |
__ jcc(Assembler::equal, L); |
357 |
__ stop("StubRoutines::catch_exception: threads must correspond"); |
|
358 |
__ bind(L); |
|
359 |
} |
|
360 |
#endif |
|
361 |
// set pending exception |
|
362 |
__ verify_oop(rax); |
|
1066 | 363 |
__ movptr(Address(rcx, Thread::pending_exception_offset()), rax ); |
1 | 364 |
__ lea(Address(rcx, Thread::exception_file_offset ()), |
365 |
ExternalAddress((address)__FILE__)); |
|
366 |
__ movl(Address(rcx, Thread::exception_line_offset ()), __LINE__ ); |
|
367 |
// complete return to VM |
|
368 |
assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); |
|
369 |
__ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); |
|
370 |
||
371 |
return start; |
|
372 |
} |
|
373 |
||
374 |
||
375 |
//------------------------------------------------------------------------------------------------------------------------ |
|
376 |
// Continuation point for runtime calls returning with a pending exception. |
|
377 |
// The pending exception check happened in the runtime or native call stub. |
|
378 |
// The pending exception in Thread is converted into a Java-level exception. |
|
379 |
// |
|
380 |
// Contract with Java-level exception handlers: |
|
5046 | 381 |
// rax: exception |
1 | 382 |
// rdx: throwing pc |
383 |
// |
|
384 |
// NOTE: At entry of this stub, exception-pc must be on stack !! |
|
385 |
||
386 |
address generate_forward_exception() { |
|
387 |
StubCodeMark mark(this, "StubRoutines", "forward exception"); |
|
388 |
address start = __ pc(); |
|
5046 | 389 |
const Register thread = rcx; |
390 |
||
391 |
// other registers used in this stub |
|
392 |
const Register exception_oop = rax; |
|
393 |
const Register handler_addr = rbx; |
|
394 |
const Register exception_pc = rdx; |
|
1 | 395 |
|
396 |
// Upon entry, the sp points to the return address returning into Java |
|
397 |
// (interpreted or compiled) code; i.e., the return address becomes the |
|
398 |
// throwing pc. |
|
399 |
// |
|
400 |
// Arguments pushed before the runtime call are still on the stack but |
|
401 |
// the exception handler will reset the stack pointer -> ignore them. |
|
402 |
// A potential result in registers can be ignored as well. |
|
403 |
||
404 |
#ifdef ASSERT |
|
405 |
// make sure this code is only executed if there is a pending exception |
|
406 |
{ Label L; |
|
5046 | 407 |
__ get_thread(thread); |
408 |
__ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
1 | 409 |
__ jcc(Assembler::notEqual, L); |
410 |
__ stop("StubRoutines::forward exception: no pending exception (1)"); |
|
411 |
__ bind(L); |
|
412 |
} |
|
413 |
#endif |
|
414 |
||
415 |
// compute exception handler into rbx, |
|
5046 | 416 |
__ get_thread(thread); |
417 |
__ movptr(exception_pc, Address(rsp, 0)); |
|
1 | 418 |
BLOCK_COMMENT("call exception_handler_for_return_address"); |
5046 | 419 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc); |
420 |
__ mov(handler_addr, rax); |
|
1 | 421 |
|
5046 | 422 |
// setup rax & rdx, remove return address & clear pending exception |
423 |
__ get_thread(thread); |
|
424 |
__ pop(exception_pc); |
|
425 |
__ movptr(exception_oop, Address(thread, Thread::pending_exception_offset())); |
|
426 |
__ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); |
|
1 | 427 |
|
428 |
#ifdef ASSERT |
|
429 |
// make sure exception is set |
|
430 |
{ Label L; |
|
5046 | 431 |
__ testptr(exception_oop, exception_oop); |
1 | 432 |
__ jcc(Assembler::notEqual, L); |
433 |
__ stop("StubRoutines::forward exception: no pending exception (2)"); |
|
434 |
__ bind(L); |
|
435 |
} |
|
436 |
#endif |
|
437 |
||
5046 | 438 |
// Verify that there is really a valid exception in RAX. |
439 |
__ verify_oop(exception_oop); |
|
440 |
||
1 | 441 |
// continue at exception handler (return address removed) |
5046 | 442 |
// rax: exception |
443 |
// rbx: exception handler |
|
1 | 444 |
// rdx: throwing pc |
5046 | 445 |
__ jmp(handler_addr); |
1 | 446 |
|
447 |
return start; |
|
448 |
} |
|
449 |
||
450 |
||
451 |
//---------------------------------------------------------------------------------------------------- |
|
452 |
// Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest) |
|
453 |
// |
|
454 |
// xchg exists as far back as 8086, lock needed for MP only |
|
455 |
// Stack layout immediately after call: |
|
456 |
// |
|
457 |
// 0 [ret addr ] <--- rsp |
|
458 |
// 1 [ ex ] |
|
459 |
// 2 [ dest ] |
|
460 |
// |
|
461 |
// Result: *dest <- ex, return (old *dest) |
|
462 |
// |
|
463 |
// Note: win32 does not currently use this code |
|
464 |
||
465 |
address generate_atomic_xchg() { |
|
466 |
StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); |
|
467 |
address start = __ pc(); |
|
468 |
||
1066 | 469 |
__ push(rdx); |
1 | 470 |
Address exchange(rsp, 2 * wordSize); |
471 |
Address dest_addr(rsp, 3 * wordSize); |
|
472 |
__ movl(rax, exchange); |
|
1066 | 473 |
__ movptr(rdx, dest_addr); |
474 |
__ xchgl(rax, Address(rdx, 0)); |
|
475 |
__ pop(rdx); |
|
1 | 476 |
__ ret(0); |
477 |
||
478 |
return start; |
|
479 |
} |
|
480 |
||
481 |
//---------------------------------------------------------------------------------------------------- |
|
482 |
// Support for void verify_mxcsr() |
|
483 |
// |
|
484 |
// This routine is used with -Xcheck:jni to verify that native |
|
485 |
// JNI code does not return to Java code without restoring the |
|
486 |
// MXCSR register to our expected state. |
|
487 |
||
488 |
||
489 |
address generate_verify_mxcsr() { |
|
490 |
StubCodeMark mark(this, "StubRoutines", "verify_mxcsr"); |
|
491 |
address start = __ pc(); |
|
492 |
||
493 |
const Address mxcsr_save(rsp, 0); |
|
494 |
||
495 |
if (CheckJNICalls && UseSSE > 0 ) { |
|
496 |
Label ok_ret; |
|
497 |
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); |
|
1066 | 498 |
__ push(rax); |
499 |
__ subptr(rsp, wordSize); // allocate a temp location |
|
1 | 500 |
__ stmxcsr(mxcsr_save); |
501 |
__ movl(rax, mxcsr_save); |
|
502 |
__ andl(rax, MXCSR_MASK); |
|
503 |
__ cmp32(rax, mxcsr_std); |
|
504 |
__ jcc(Assembler::equal, ok_ret); |
|
505 |
||
506 |
__ warn("MXCSR changed by native JNI code."); |
|
507 |
||
508 |
__ ldmxcsr(mxcsr_std); |
|
509 |
||
510 |
__ bind(ok_ret); |
|
1066 | 511 |
__ addptr(rsp, wordSize); |
512 |
__ pop(rax); |
|
1 | 513 |
} |
514 |
||
515 |
__ ret(0); |
|
516 |
||
517 |
return start; |
|
518 |
} |
|
519 |
||
520 |
||
521 |
//--------------------------------------------------------------------------- |
|
522 |
// Support for void verify_fpu_cntrl_wrd() |
|
523 |
// |
|
524 |
// This routine is used with -Xcheck:jni to verify that native |
|
525 |
// JNI code does not return to Java code without restoring the |
|
526 |
// FP control word to our expected state. |
|
527 |
||
528 |
address generate_verify_fpu_cntrl_wrd() { |
|
529 |
StubCodeMark mark(this, "StubRoutines", "verify_spcw"); |
|
530 |
address start = __ pc(); |
|
531 |
||
532 |
const Address fpu_cntrl_wrd_save(rsp, 0); |
|
533 |
||
534 |
if (CheckJNICalls) { |
|
535 |
Label ok_ret; |
|
1066 | 536 |
__ push(rax); |
537 |
__ subptr(rsp, wordSize); // allocate a temp location |
|
1 | 538 |
__ fnstcw(fpu_cntrl_wrd_save); |
539 |
__ movl(rax, fpu_cntrl_wrd_save); |
|
540 |
__ andl(rax, FPU_CNTRL_WRD_MASK); |
|
541 |
ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std()); |
|
542 |
__ cmp32(rax, fpu_std); |
|
543 |
__ jcc(Assembler::equal, ok_ret); |
|
544 |
||
545 |
__ warn("Floating point control word changed by native JNI code."); |
|
546 |
||
547 |
__ fldcw(fpu_std); |
|
548 |
||
549 |
__ bind(ok_ret); |
|
1066 | 550 |
__ addptr(rsp, wordSize); |
551 |
__ pop(rax); |
|
1 | 552 |
} |
553 |
||
554 |
__ ret(0); |
|
555 |
||
556 |
return start; |
|
557 |
} |
|
558 |
||
559 |
//--------------------------------------------------------------------------- |
|
560 |
// Wrapper for slow-case handling of double-to-integer conversion |
|
561 |
// d2i or f2i fast case failed either because it is nan or because |
|
562 |
// of under/overflow. |
|
563 |
// Input: FPU TOS: float value |
|
564 |
// Output: rax, (rdx): integer (long) result |
|
565 |
||
566 |
address generate_d2i_wrapper(BasicType t, address fcn) { |
|
567 |
StubCodeMark mark(this, "StubRoutines", "d2i_wrapper"); |
|
568 |
address start = __ pc(); |
|
569 |
||
570 |
// Capture info about frame layout |
|
571 |
enum layout { FPUState_off = 0, |
|
572 |
rbp_off = FPUStateSizeInWords, |
|
573 |
rdi_off, |
|
574 |
rsi_off, |
|
575 |
rcx_off, |
|
576 |
rbx_off, |
|
577 |
saved_argument_off, |
|
578 |
saved_argument_off2, // 2nd half of double |
|
579 |
framesize |
|
580 |
}; |
|
581 |
||
582 |
assert(FPUStateSizeInWords == 27, "update stack layout"); |
|
583 |
||
584 |
// Save outgoing argument to stack across push_FPU_state() |
|
1066 | 585 |
__ subptr(rsp, wordSize * 2); |
1 | 586 |
__ fstp_d(Address(rsp, 0)); |
587 |
||
588 |
// Save CPU & FPU state |
|
1066 | 589 |
__ push(rbx); |
590 |
__ push(rcx); |
|
591 |
__ push(rsi); |
|
592 |
__ push(rdi); |
|
593 |
__ push(rbp); |
|
1 | 594 |
__ push_FPU_state(); |
595 |
||
596 |
// push_FPU_state() resets the FP top of stack |
|
597 |
// Load original double into FP top of stack |
|
598 |
__ fld_d(Address(rsp, saved_argument_off * wordSize)); |
|
599 |
// Store double into stack as outgoing argument |
|
1066 | 600 |
__ subptr(rsp, wordSize*2); |
1 | 601 |
__ fst_d(Address(rsp, 0)); |
602 |
||
603 |
// Prepare FPU for doing math in C-land |
|
604 |
__ empty_FPU_stack(); |
|
605 |
// Call the C code to massage the double. Result in EAX |
|
606 |
if (t == T_INT) |
|
607 |
{ BLOCK_COMMENT("SharedRuntime::d2i"); } |
|
608 |
else if (t == T_LONG) |
|
609 |
{ BLOCK_COMMENT("SharedRuntime::d2l"); } |
|
610 |
__ call_VM_leaf( fcn, 2 ); |
|
611 |
||
612 |
// Restore CPU & FPU state |
|
613 |
__ pop_FPU_state(); |
|
1066 | 614 |
__ pop(rbp); |
615 |
__ pop(rdi); |
|
616 |
__ pop(rsi); |
|
617 |
__ pop(rcx); |
|
618 |
__ pop(rbx); |
|
619 |
__ addptr(rsp, wordSize * 2); |
|
1 | 620 |
|
621 |
__ ret(0); |
|
622 |
||
623 |
return start; |
|
624 |
} |
|
625 |
||
626 |
||
627 |
//--------------------------------------------------------------------------- |
|
628 |
// The following routine generates a subroutine to throw an asynchronous |
|
629 |
// UnknownError when an unsafe access gets a fault that could not be |
|
630 |
// reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) |
|
631 |
address generate_handler_for_unsafe_access() { |
|
632 |
StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); |
|
633 |
address start = __ pc(); |
|
634 |
||
1066 | 635 |
__ push(0); // hole for return address-to-be |
636 |
__ pusha(); // push registers |
|
1 | 637 |
Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord); |
638 |
BLOCK_COMMENT("call handle_unsafe_access"); |
|
639 |
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access))); |
|
1066 | 640 |
__ movptr(next_pc, rax); // stuff next address |
641 |
__ popa(); |
|
1 | 642 |
__ ret(0); // jump to next address |
643 |
||
644 |
return start; |
|
645 |
} |
|
646 |
||
647 |
||
648 |
//---------------------------------------------------------------------------------------------------- |
|
649 |
// Non-destructive plausibility checks for oops |
|
650 |
||
651 |
address generate_verify_oop() { |
|
652 |
StubCodeMark mark(this, "StubRoutines", "verify_oop"); |
|
653 |
address start = __ pc(); |
|
654 |
||
655 |
// Incoming arguments on stack after saving rax,: |
|
656 |
// |
|
657 |
// [tos ]: saved rdx |
|
658 |
// [tos + 1]: saved EFLAGS |
|
659 |
// [tos + 2]: return address |
|
660 |
// [tos + 3]: char* error message |
|
661 |
// [tos + 4]: oop object to verify |
|
662 |
// [tos + 5]: saved rax, - saved by caller and bashed |
|
663 |
||
664 |
Label exit, error; |
|
1066 | 665 |
__ pushf(); |
666 |
__ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); |
|
667 |
__ push(rdx); // save rdx |
|
1 | 668 |
// make sure object is 'reasonable' |
1066 | 669 |
__ movptr(rax, Address(rsp, 4 * wordSize)); // get object |
670 |
__ testptr(rax, rax); |
|
1 | 671 |
__ jcc(Assembler::zero, exit); // if obj is NULL it is ok |
672 |
||
673 |
// Check if the oop is in the right area of memory |
|
674 |
const int oop_mask = Universe::verify_oop_mask(); |
|
675 |
const int oop_bits = Universe::verify_oop_bits(); |
|
1066 | 676 |
__ mov(rdx, rax); |
677 |
__ andptr(rdx, oop_mask); |
|
678 |
__ cmpptr(rdx, oop_bits); |
|
1 | 679 |
__ jcc(Assembler::notZero, error); |
680 |
||
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
681 |
// make sure klass is 'reasonable', which is not zero. |
1066 | 682 |
__ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass |
683 |
__ testptr(rax, rax); |
|
1 | 684 |
__ jcc(Assembler::zero, error); // if klass is NULL it is broken |
685 |
||
686 |
// return if everything seems ok |
|
687 |
__ bind(exit); |
|
1066 | 688 |
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
689 |
__ pop(rdx); // restore rdx |
|
690 |
__ popf(); // restore EFLAGS |
|
1 | 691 |
__ ret(3 * wordSize); // pop arguments |
692 |
||
693 |
// handle errors |
|
694 |
__ bind(error); |
|
1066 | 695 |
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
696 |
__ pop(rdx); // get saved rdx back |
|
697 |
__ popf(); // get saved EFLAGS off stack -- will be ignored |
|
698 |
__ pusha(); // push registers (eip = return address & msg are already pushed) |
|
1 | 699 |
BLOCK_COMMENT("call MacroAssembler::debug"); |
1066 | 700 |
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); |
701 |
__ popa(); |
|
1 | 702 |
__ ret(3 * wordSize); // pop arguments |
703 |
return start; |
|
704 |
} |
|
705 |
||
706 |
// |
|
707 |
// Generate pre-barrier for array stores |
|
708 |
// |
|
709 |
// Input: |
|
710 |
// start - starting address |
|
3262
30d1c247fc25
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
2534
diff
changeset
|
711 |
// count - element count |
8498 | 712 |
void gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) { |
1 | 713 |
assert_different_registers(start, count); |
714 |
BarrierSet* bs = Universe::heap()->barrier_set(); |
|
715 |
switch (bs->kind()) { |
|
716 |
case BarrierSet::G1SATBCTLogging: |
|
8498 | 717 |
// With G1, don't generate the call if we statically know that the target in uninitialized |
718 |
if (!uninitialized_target) { |
|
719 |
__ pusha(); // push registers |
|
720 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), |
|
721 |
start, count); |
|
722 |
__ popa(); |
|
723 |
} |
|
1 | 724 |
break; |
725 |
case BarrierSet::CardTableModRef: |
|
726 |
case BarrierSet::CardTableExtension: |
|
727 |
case BarrierSet::ModRef: |
|
728 |
break; |
|
729 |
default : |
|
730 |
ShouldNotReachHere(); |
|
731 |
||
732 |
} |
|
733 |
} |
|
734 |
||
735 |
||
736 |
// |
|
737 |
// Generate a post-barrier for an array store |
|
738 |
// |
|
739 |
// start - starting address |
|
740 |
// count - element count |
|
741 |
// |
|
742 |
// The two input registers are overwritten. |
|
743 |
// |
|
744 |
void gen_write_ref_array_post_barrier(Register start, Register count) { |
|
745 |
BarrierSet* bs = Universe::heap()->barrier_set(); |
|
746 |
assert_different_registers(start, count); |
|
747 |
switch (bs->kind()) { |
|
748 |
case BarrierSet::G1SATBCTLogging: |
|
749 |
{ |
|
1066 | 750 |
__ pusha(); // push registers |
4740
d708800308b7
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
4645
diff
changeset
|
751 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), |
d708800308b7
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
4645
diff
changeset
|
752 |
start, count); |
1066 | 753 |
__ popa(); |
1 | 754 |
} |
755 |
break; |
|
756 |
||
757 |
case BarrierSet::CardTableModRef: |
|
758 |
case BarrierSet::CardTableExtension: |
|
759 |
{ |
|
29325 | 760 |
CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs); |
1 | 761 |
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
762 |
||
763 |
Label L_loop; |
|
764 |
const Register end = count; // elements count; end == start+count-1 |
|
765 |
assert_different_registers(start, end); |
|
766 |
||
1066 | 767 |
__ lea(end, Address(start, count, Address::times_ptr, -wordSize)); |
768 |
__ shrptr(start, CardTableModRefBS::card_shift); |
|
769 |
__ shrptr(end, CardTableModRefBS::card_shift); |
|
770 |
__ subptr(end, start); // end --> count |
|
1 | 771 |
__ BIND(L_loop); |
957
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
772 |
intptr_t disp = (intptr_t) ct->byte_map_base; |
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
773 |
Address cardtable(start, count, Address::times_1, disp); |
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
774 |
__ movb(cardtable, 0); |
1 | 775 |
__ decrement(count); |
776 |
__ jcc(Assembler::greaterEqual, L_loop); |
|
777 |
} |
|
778 |
break; |
|
779 |
case BarrierSet::ModRef: |
|
780 |
break; |
|
781 |
default : |
|
782 |
ShouldNotReachHere(); |
|
783 |
||
784 |
} |
|
785 |
} |
|
786 |
||
1437 | 787 |
|
788 |
// Copy 64 bytes chunks |
|
789 |
// |
|
790 |
// Inputs: |
|
791 |
// from - source array address |
|
792 |
// to_from - destination array address - from |
|
793 |
// qword_count - 8-bytes element count, negative |
|
794 |
// |
|
795 |
void xmm_copy_forward(Register from, Register to_from, Register qword_count) { |
|
796 |
assert( UseSSE >= 2, "supported cpu only" ); |
|
797 |
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
|
798 |
// Copy 64-byte chunks |
|
799 |
__ jmpb(L_copy_64_bytes); |
|
5249 | 800 |
__ align(OptoLoopAlignment); |
1437 | 801 |
__ BIND(L_copy_64_bytes_loop); |
802 |
||
15115
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
803 |
if (UseUnalignedLoadStores) { |
30624 | 804 |
if (UseAVX > 2) { |
805 |
__ evmovdqu(xmm0, Address(from, 0), Assembler::AVX_512bit); |
|
806 |
__ evmovdqu(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit); |
|
807 |
} else if (UseAVX == 2) { |
|
15115
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
808 |
__ vmovdqu(xmm0, Address(from, 0)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
809 |
__ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
810 |
__ vmovdqu(xmm1, Address(from, 32)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
811 |
__ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
812 |
} else { |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
813 |
__ movdqu(xmm0, Address(from, 0)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
814 |
__ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
815 |
__ movdqu(xmm1, Address(from, 16)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
816 |
__ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
817 |
__ movdqu(xmm2, Address(from, 32)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
818 |
__ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
819 |
__ movdqu(xmm3, Address(from, 48)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
820 |
__ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
821 |
} |
1437 | 822 |
} else { |
823 |
__ movq(xmm0, Address(from, 0)); |
|
824 |
__ movq(Address(from, to_from, Address::times_1, 0), xmm0); |
|
825 |
__ movq(xmm1, Address(from, 8)); |
|
826 |
__ movq(Address(from, to_from, Address::times_1, 8), xmm1); |
|
827 |
__ movq(xmm2, Address(from, 16)); |
|
828 |
__ movq(Address(from, to_from, Address::times_1, 16), xmm2); |
|
829 |
__ movq(xmm3, Address(from, 24)); |
|
830 |
__ movq(Address(from, to_from, Address::times_1, 24), xmm3); |
|
831 |
__ movq(xmm4, Address(from, 32)); |
|
832 |
__ movq(Address(from, to_from, Address::times_1, 32), xmm4); |
|
833 |
__ movq(xmm5, Address(from, 40)); |
|
834 |
__ movq(Address(from, to_from, Address::times_1, 40), xmm5); |
|
835 |
__ movq(xmm6, Address(from, 48)); |
|
836 |
__ movq(Address(from, to_from, Address::times_1, 48), xmm6); |
|
837 |
__ movq(xmm7, Address(from, 56)); |
|
838 |
__ movq(Address(from, to_from, Address::times_1, 56), xmm7); |
|
839 |
} |
|
840 |
||
841 |
__ addl(from, 64); |
|
842 |
__ BIND(L_copy_64_bytes); |
|
843 |
__ subl(qword_count, 8); |
|
844 |
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); |
|
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
845 |
|
30624 | 846 |
if (UseUnalignedLoadStores && (UseAVX == 2)) { |
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
847 |
// clean upper bits of YMM registers |
30299 | 848 |
__ vpxor(xmm0, xmm0); |
849 |
__ vpxor(xmm1, xmm1); |
|
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
850 |
} |
1437 | 851 |
__ addl(qword_count, 8); |
852 |
__ jccb(Assembler::zero, L_exit); |
|
853 |
// |
|
854 |
// length is too short, just copy qwords |
|
855 |
// |
|
856 |
__ BIND(L_copy_8_bytes); |
|
857 |
__ movq(xmm0, Address(from, 0)); |
|
858 |
__ movq(Address(from, to_from, Address::times_1), xmm0); |
|
859 |
__ addl(from, 8); |
|
860 |
__ decrement(qword_count); |
|
861 |
__ jcc(Assembler::greater, L_copy_8_bytes); |
|
862 |
__ BIND(L_exit); |
|
863 |
} |
|
864 |
||
1 | 865 |
// Copy 64 bytes chunks |
866 |
// |
|
867 |
// Inputs: |
|
868 |
// from - source array address |
|
869 |
// to_from - destination array address - from |
|
870 |
// qword_count - 8-bytes element count, negative |
|
871 |
// |
|
872 |
void mmx_copy_forward(Register from, Register to_from, Register qword_count) { |
|
1437 | 873 |
assert( VM_Version::supports_mmx(), "supported cpu only" ); |
1 | 874 |
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
875 |
// Copy 64-byte chunks |
|
876 |
__ jmpb(L_copy_64_bytes); |
|
5249 | 877 |
__ align(OptoLoopAlignment); |
1 | 878 |
__ BIND(L_copy_64_bytes_loop); |
879 |
__ movq(mmx0, Address(from, 0)); |
|
880 |
__ movq(mmx1, Address(from, 8)); |
|
881 |
__ movq(mmx2, Address(from, 16)); |
|
882 |
__ movq(Address(from, to_from, Address::times_1, 0), mmx0); |
|
883 |
__ movq(mmx3, Address(from, 24)); |
|
884 |
__ movq(Address(from, to_from, Address::times_1, 8), mmx1); |
|
885 |
__ movq(mmx4, Address(from, 32)); |
|
886 |
__ movq(Address(from, to_from, Address::times_1, 16), mmx2); |
|
887 |
__ movq(mmx5, Address(from, 40)); |
|
888 |
__ movq(Address(from, to_from, Address::times_1, 24), mmx3); |
|
889 |
__ movq(mmx6, Address(from, 48)); |
|
890 |
__ movq(Address(from, to_from, Address::times_1, 32), mmx4); |
|
891 |
__ movq(mmx7, Address(from, 56)); |
|
892 |
__ movq(Address(from, to_from, Address::times_1, 40), mmx5); |
|
893 |
__ movq(Address(from, to_from, Address::times_1, 48), mmx6); |
|
894 |
__ movq(Address(from, to_from, Address::times_1, 56), mmx7); |
|
1066 | 895 |
__ addptr(from, 64); |
1 | 896 |
__ BIND(L_copy_64_bytes); |
897 |
__ subl(qword_count, 8); |
|
898 |
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); |
|
899 |
__ addl(qword_count, 8); |
|
900 |
__ jccb(Assembler::zero, L_exit); |
|
901 |
// |
|
902 |
// length is too short, just copy qwords |
|
903 |
// |
|
904 |
__ BIND(L_copy_8_bytes); |
|
905 |
__ movq(mmx0, Address(from, 0)); |
|
906 |
__ movq(Address(from, to_from, Address::times_1), mmx0); |
|
1066 | 907 |
__ addptr(from, 8); |
1 | 908 |
__ decrement(qword_count); |
909 |
__ jcc(Assembler::greater, L_copy_8_bytes); |
|
910 |
__ BIND(L_exit); |
|
911 |
__ emms(); |
|
912 |
} |
|
913 |
||
914 |
address generate_disjoint_copy(BasicType t, bool aligned, |
|
915 |
Address::ScaleFactor sf, |
|
8498 | 916 |
address* entry, const char *name, |
917 |
bool dest_uninitialized = false) { |
|
1 | 918 |
__ align(CodeEntryAlignment); |
919 |
StubCodeMark mark(this, "StubRoutines", name); |
|
920 |
address start = __ pc(); |
|
921 |
||
922 |
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; |
|
923 |
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes; |
|
924 |
||
1066 | 925 |
int shift = Address::times_ptr - sf; |
1 | 926 |
|
927 |
const Register from = rsi; // source array address |
|
928 |
const Register to = rdi; // destination array address |
|
929 |
const Register count = rcx; // elements count |
|
930 |
const Register to_from = to; // (to - from) |
|
931 |
const Register saved_to = rdx; // saved destination array address |
|
932 |
||
933 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 934 |
__ push(rsi); |
935 |
__ push(rdi); |
|
936 |
__ movptr(from , Address(rsp, 12+ 4)); |
|
937 |
__ movptr(to , Address(rsp, 12+ 8)); |
|
1 | 938 |
__ movl(count, Address(rsp, 12+ 12)); |
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
939 |
|
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
940 |
if (entry != NULL) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
941 |
*entry = __ pc(); // Entry point from conjoint arraycopy stub. |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
942 |
BLOCK_COMMENT("Entry:"); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
943 |
} |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
944 |
|
1 | 945 |
if (t == T_OBJECT) { |
946 |
__ testl(count, count); |
|
947 |
__ jcc(Assembler::zero, L_0_count); |
|
8498 | 948 |
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
1066 | 949 |
__ mov(saved_to, to); // save 'to' |
1 | 950 |
} |
951 |
||
1066 | 952 |
__ subptr(to, from); // to --> to_from |
1 | 953 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
954 |
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp |
|
1437 | 955 |
if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { |
1 | 956 |
// align source address at 4 bytes address boundary |
957 |
if (t == T_BYTE) { |
|
958 |
// One byte misalignment happens only for byte arrays |
|
959 |
__ testl(from, 1); |
|
960 |
__ jccb(Assembler::zero, L_skip_align1); |
|
961 |
__ movb(rax, Address(from, 0)); |
|
962 |
__ movb(Address(from, to_from, Address::times_1, 0), rax); |
|
963 |
__ increment(from); |
|
964 |
__ decrement(count); |
|
965 |
__ BIND(L_skip_align1); |
|
966 |
} |
|
967 |
// Two bytes misalignment happens only for byte and short (char) arrays |
|
968 |
__ testl(from, 2); |
|
969 |
__ jccb(Assembler::zero, L_skip_align2); |
|
970 |
__ movw(rax, Address(from, 0)); |
|
971 |
__ movw(Address(from, to_from, Address::times_1, 0), rax); |
|
1066 | 972 |
__ addptr(from, 2); |
1 | 973 |
__ subl(count, 1<<(shift-1)); |
974 |
__ BIND(L_skip_align2); |
|
975 |
} |
|
976 |
if (!VM_Version::supports_mmx()) { |
|
1066 | 977 |
__ mov(rax, count); // save 'count' |
978 |
__ shrl(count, shift); // bytes count |
|
979 |
__ addptr(to_from, from);// restore 'to' |
|
980 |
__ rep_mov(); |
|
981 |
__ subptr(to_from, from);// restore 'to_from' |
|
982 |
__ mov(count, rax); // restore 'count' |
|
1 | 983 |
__ jmpb(L_copy_2_bytes); // all dwords were copied |
984 |
} else { |
|
1437 | 985 |
if (!UseUnalignedLoadStores) { |
986 |
// align to 8 bytes, we know we are 4 byte aligned to start |
|
987 |
__ testptr(from, 4); |
|
988 |
__ jccb(Assembler::zero, L_copy_64_bytes); |
|
989 |
__ movl(rax, Address(from, 0)); |
|
990 |
__ movl(Address(from, to_from, Address::times_1, 0), rax); |
|
991 |
__ addptr(from, 4); |
|
992 |
__ subl(count, 1<<shift); |
|
993 |
} |
|
1 | 994 |
__ BIND(L_copy_64_bytes); |
1066 | 995 |
__ mov(rax, count); |
1 | 996 |
__ shrl(rax, shift+1); // 8 bytes chunk count |
997 |
// |
|
998 |
// Copy 8-byte chunks through MMX registers, 8 per iteration of the loop |
|
999 |
// |
|
1437 | 1000 |
if (UseXMMForArrayCopy) { |
1001 |
xmm_copy_forward(from, to_from, rax); |
|
1002 |
} else { |
|
1003 |
mmx_copy_forward(from, to_from, rax); |
|
1004 |
} |
|
1 | 1005 |
} |
1006 |
// copy tailing dword |
|
1007 |
__ BIND(L_copy_4_bytes); |
|
1008 |
__ testl(count, 1<<shift); |
|
1009 |
__ jccb(Assembler::zero, L_copy_2_bytes); |
|
1010 |
__ movl(rax, Address(from, 0)); |
|
1011 |
__ movl(Address(from, to_from, Address::times_1, 0), rax); |
|
1012 |
if (t == T_BYTE || t == T_SHORT) { |
|
1066 | 1013 |
__ addptr(from, 4); |
1 | 1014 |
__ BIND(L_copy_2_bytes); |
1015 |
// copy tailing word |
|
1016 |
__ testl(count, 1<<(shift-1)); |
|
1017 |
__ jccb(Assembler::zero, L_copy_byte); |
|
1018 |
__ movw(rax, Address(from, 0)); |
|
1019 |
__ movw(Address(from, to_from, Address::times_1, 0), rax); |
|
1020 |
if (t == T_BYTE) { |
|
1066 | 1021 |
__ addptr(from, 2); |
1 | 1022 |
__ BIND(L_copy_byte); |
1023 |
// copy tailing byte |
|
1024 |
__ testl(count, 1); |
|
1025 |
__ jccb(Assembler::zero, L_exit); |
|
1026 |
__ movb(rax, Address(from, 0)); |
|
1027 |
__ movb(Address(from, to_from, Address::times_1, 0), rax); |
|
1028 |
__ BIND(L_exit); |
|
1029 |
} else { |
|
1030 |
__ BIND(L_copy_byte); |
|
1031 |
} |
|
1032 |
} else { |
|
1033 |
__ BIND(L_copy_2_bytes); |
|
1034 |
} |
|
1035 |
||
1036 |
if (t == T_OBJECT) { |
|
1037 |
__ movl(count, Address(rsp, 12+12)); // reread 'count' |
|
1066 | 1038 |
__ mov(to, saved_to); // restore 'to' |
1 | 1039 |
gen_write_ref_array_post_barrier(to, count); |
1040 |
__ BIND(L_0_count); |
|
1041 |
} |
|
1042 |
inc_copy_counter_np(t); |
|
1066 | 1043 |
__ pop(rdi); |
1044 |
__ pop(rsi); |
|
1 | 1045 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1066 | 1046 |
__ xorptr(rax, rax); // return 0 |
1 | 1047 |
__ ret(0); |
1048 |
return start; |
|
1049 |
} |
|
1050 |
||
1051 |
||
6433 | 1052 |
address generate_fill(BasicType t, bool aligned, const char *name) { |
1053 |
__ align(CodeEntryAlignment); |
|
1054 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1055 |
address start = __ pc(); |
|
1056 |
||
1057 |
BLOCK_COMMENT("Entry:"); |
|
1058 |
||
1059 |
const Register to = rdi; // source array address |
|
1060 |
const Register value = rdx; // value |
|
1061 |
const Register count = rsi; // elements count |
|
1062 |
||
1063 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1064 |
__ push(rsi); |
|
1065 |
__ push(rdi); |
|
1066 |
__ movptr(to , Address(rsp, 12+ 4)); |
|
1067 |
__ movl(value, Address(rsp, 12+ 8)); |
|
1068 |
__ movl(count, Address(rsp, 12+ 12)); |
|
1069 |
||
1070 |
__ generate_fill(t, aligned, to, value, count, rax, xmm0); |
|
1071 |
||
1072 |
__ pop(rdi); |
|
1073 |
__ pop(rsi); |
|
1074 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1075 |
__ ret(0); |
|
1076 |
return start; |
|
1077 |
} |
|
1078 |
||
1 | 1079 |
address generate_conjoint_copy(BasicType t, bool aligned, |
1080 |
Address::ScaleFactor sf, |
|
1081 |
address nooverlap_target, |
|
8498 | 1082 |
address* entry, const char *name, |
1083 |
bool dest_uninitialized = false) { |
|
1 | 1084 |
__ align(CodeEntryAlignment); |
1085 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1086 |
address start = __ pc(); |
|
1087 |
||
1088 |
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; |
|
1089 |
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1090 |
||
1066 | 1091 |
int shift = Address::times_ptr - sf; |
1 | 1092 |
|
1093 |
const Register src = rax; // source array address |
|
1094 |
const Register dst = rdx; // destination array address |
|
1095 |
const Register from = rsi; // source array address |
|
1096 |
const Register to = rdi; // destination array address |
|
1097 |
const Register count = rcx; // elements count |
|
1098 |
const Register end = rax; // array end address |
|
1099 |
||
1100 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1101 |
__ push(rsi); |
1102 |
__ push(rdi); |
|
1103 |
__ movptr(src , Address(rsp, 12+ 4)); // from |
|
1104 |
__ movptr(dst , Address(rsp, 12+ 8)); // to |
|
1105 |
__ movl2ptr(count, Address(rsp, 12+12)); // count |
|
1 | 1106 |
|
1107 |
if (entry != NULL) { |
|
1108 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
|
1109 |
BLOCK_COMMENT("Entry:"); |
|
1110 |
} |
|
1111 |
||
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1112 |
// nooverlap_target expects arguments in rsi and rdi. |
1066 | 1113 |
__ mov(from, src); |
1114 |
__ mov(to , dst); |
|
1 | 1115 |
|
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1116 |
// arrays overlap test: dispatch to disjoint stub if necessary. |
1 | 1117 |
RuntimeAddress nooverlap(nooverlap_target); |
1066 | 1118 |
__ cmpptr(dst, src); |
1119 |
__ lea(end, Address(src, count, sf, 0)); // src + count * elem_size |
|
1 | 1120 |
__ jump_cc(Assembler::belowEqual, nooverlap); |
1066 | 1121 |
__ cmpptr(dst, end); |
1 | 1122 |
__ jump_cc(Assembler::aboveEqual, nooverlap); |
1123 |
||
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1124 |
if (t == T_OBJECT) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1125 |
__ testl(count, count); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1126 |
__ jcc(Assembler::zero, L_0_count); |
8498 | 1127 |
gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized); |
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1128 |
} |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1129 |
|
1 | 1130 |
// copy from high to low |
1131 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
|
1132 |
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp |
|
1133 |
if (t == T_BYTE || t == T_SHORT) { |
|
1134 |
// Align the end of destination array at 4 bytes address boundary |
|
1066 | 1135 |
__ lea(end, Address(dst, count, sf, 0)); |
1 | 1136 |
if (t == T_BYTE) { |
1137 |
// One byte misalignment happens only for byte arrays |
|
1138 |
__ testl(end, 1); |
|
1139 |
__ jccb(Assembler::zero, L_skip_align1); |
|
1140 |
__ decrement(count); |
|
1141 |
__ movb(rdx, Address(from, count, sf, 0)); |
|
1142 |
__ movb(Address(to, count, sf, 0), rdx); |
|
1143 |
__ BIND(L_skip_align1); |
|
1144 |
} |
|
1145 |
// Two bytes misalignment happens only for byte and short (char) arrays |
|
1146 |
__ testl(end, 2); |
|
1147 |
__ jccb(Assembler::zero, L_skip_align2); |
|
1066 | 1148 |
__ subptr(count, 1<<(shift-1)); |
1 | 1149 |
__ movw(rdx, Address(from, count, sf, 0)); |
1150 |
__ movw(Address(to, count, sf, 0), rdx); |
|
1151 |
__ BIND(L_skip_align2); |
|
1152 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
|
1153 |
__ jcc(Assembler::below, L_copy_4_bytes); |
|
1154 |
} |
|
1155 |
||
1156 |
if (!VM_Version::supports_mmx()) { |
|
1157 |
__ std(); |
|
1066 | 1158 |
__ mov(rax, count); // Save 'count' |
1159 |
__ mov(rdx, to); // Save 'to' |
|
1160 |
__ lea(rsi, Address(from, count, sf, -4)); |
|
1161 |
__ lea(rdi, Address(to , count, sf, -4)); |
|
1162 |
__ shrptr(count, shift); // bytes count |
|
1163 |
__ rep_mov(); |
|
1 | 1164 |
__ cld(); |
1066 | 1165 |
__ mov(count, rax); // restore 'count' |
1 | 1166 |
__ andl(count, (1<<shift)-1); // mask the number of rest elements |
1066 | 1167 |
__ movptr(from, Address(rsp, 12+4)); // reread 'from' |
1168 |
__ mov(to, rdx); // restore 'to' |
|
1 | 1169 |
__ jmpb(L_copy_2_bytes); // all dword were copied |
1170 |
} else { |
|
1171 |
// Align to 8 bytes the end of array. It is aligned to 4 bytes already. |
|
1066 | 1172 |
__ testptr(end, 4); |
1 | 1173 |
__ jccb(Assembler::zero, L_copy_8_bytes); |
1174 |
__ subl(count, 1<<shift); |
|
1175 |
__ movl(rdx, Address(from, count, sf, 0)); |
|
1176 |
__ movl(Address(to, count, sf, 0), rdx); |
|
1177 |
__ jmpb(L_copy_8_bytes); |
|
1178 |
||
5249 | 1179 |
__ align(OptoLoopAlignment); |
1 | 1180 |
// Move 8 bytes |
1181 |
__ BIND(L_copy_8_bytes_loop); |
|
1437 | 1182 |
if (UseXMMForArrayCopy) { |
1183 |
__ movq(xmm0, Address(from, count, sf, 0)); |
|
1184 |
__ movq(Address(to, count, sf, 0), xmm0); |
|
1185 |
} else { |
|
1186 |
__ movq(mmx0, Address(from, count, sf, 0)); |
|
1187 |
__ movq(Address(to, count, sf, 0), mmx0); |
|
1188 |
} |
|
1 | 1189 |
__ BIND(L_copy_8_bytes); |
1190 |
__ subl(count, 2<<shift); |
|
1191 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1192 |
__ addl(count, 2<<shift); |
|
1437 | 1193 |
if (!UseXMMForArrayCopy) { |
1194 |
__ emms(); |
|
1195 |
} |
|
1 | 1196 |
} |
1197 |
__ BIND(L_copy_4_bytes); |
|
1198 |
// copy prefix qword |
|
1199 |
__ testl(count, 1<<shift); |
|
1200 |
__ jccb(Assembler::zero, L_copy_2_bytes); |
|
1201 |
__ movl(rdx, Address(from, count, sf, -4)); |
|
1202 |
__ movl(Address(to, count, sf, -4), rdx); |
|
1203 |
||
1204 |
if (t == T_BYTE || t == T_SHORT) { |
|
1205 |
__ subl(count, (1<<shift)); |
|
1206 |
__ BIND(L_copy_2_bytes); |
|
1207 |
// copy prefix dword |
|
1208 |
__ testl(count, 1<<(shift-1)); |
|
1209 |
__ jccb(Assembler::zero, L_copy_byte); |
|
1210 |
__ movw(rdx, Address(from, count, sf, -2)); |
|
1211 |
__ movw(Address(to, count, sf, -2), rdx); |
|
1212 |
if (t == T_BYTE) { |
|
1213 |
__ subl(count, 1<<(shift-1)); |
|
1214 |
__ BIND(L_copy_byte); |
|
1215 |
// copy prefix byte |
|
1216 |
__ testl(count, 1); |
|
1217 |
__ jccb(Assembler::zero, L_exit); |
|
1218 |
__ movb(rdx, Address(from, 0)); |
|
1219 |
__ movb(Address(to, 0), rdx); |
|
1220 |
__ BIND(L_exit); |
|
1221 |
} else { |
|
1222 |
__ BIND(L_copy_byte); |
|
1223 |
} |
|
1224 |
} else { |
|
1225 |
__ BIND(L_copy_2_bytes); |
|
1226 |
} |
|
1227 |
if (t == T_OBJECT) { |
|
1066 | 1228 |
__ movl2ptr(count, Address(rsp, 12+12)); // reread count |
1 | 1229 |
gen_write_ref_array_post_barrier(to, count); |
1230 |
__ BIND(L_0_count); |
|
1231 |
} |
|
1232 |
inc_copy_counter_np(t); |
|
1066 | 1233 |
__ pop(rdi); |
1234 |
__ pop(rsi); |
|
1 | 1235 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1066 | 1236 |
__ xorptr(rax, rax); // return 0 |
1 | 1237 |
__ ret(0); |
1238 |
return start; |
|
1239 |
} |
|
1240 |
||
1241 |
||
1242 |
address generate_disjoint_long_copy(address* entry, const char *name) { |
|
1243 |
__ align(CodeEntryAlignment); |
|
1244 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1245 |
address start = __ pc(); |
|
1246 |
||
1247 |
Label L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1248 |
const Register from = rax; // source array address |
|
1249 |
const Register to = rdx; // destination array address |
|
1250 |
const Register count = rcx; // elements count |
|
1251 |
const Register to_from = rdx; // (to - from) |
|
1252 |
||
1253 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1254 |
__ movptr(from , Address(rsp, 8+0)); // from |
1255 |
__ movptr(to , Address(rsp, 8+4)); // to |
|
1256 |
__ movl2ptr(count, Address(rsp, 8+8)); // count |
|
1 | 1257 |
|
1258 |
*entry = __ pc(); // Entry point from conjoint arraycopy stub. |
|
1259 |
BLOCK_COMMENT("Entry:"); |
|
1260 |
||
1066 | 1261 |
__ subptr(to, from); // to --> to_from |
1 | 1262 |
if (VM_Version::supports_mmx()) { |
1437 | 1263 |
if (UseXMMForArrayCopy) { |
1264 |
xmm_copy_forward(from, to_from, count); |
|
1265 |
} else { |
|
1266 |
mmx_copy_forward(from, to_from, count); |
|
1267 |
} |
|
1 | 1268 |
} else { |
1269 |
__ jmpb(L_copy_8_bytes); |
|
5249 | 1270 |
__ align(OptoLoopAlignment); |
1 | 1271 |
__ BIND(L_copy_8_bytes_loop); |
1272 |
__ fild_d(Address(from, 0)); |
|
1273 |
__ fistp_d(Address(from, to_from, Address::times_1)); |
|
1066 | 1274 |
__ addptr(from, 8); |
1 | 1275 |
__ BIND(L_copy_8_bytes); |
1276 |
__ decrement(count); |
|
1277 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1278 |
} |
|
1279 |
inc_copy_counter_np(T_LONG); |
|
1280 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1281 |
__ xorptr(rax, rax); // return 0 |
1 | 1282 |
__ ret(0); |
1283 |
return start; |
|
1284 |
} |
|
1285 |
||
1286 |
address generate_conjoint_long_copy(address nooverlap_target, |
|
1287 |
address* entry, const char *name) { |
|
1288 |
__ align(CodeEntryAlignment); |
|
1289 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1290 |
address start = __ pc(); |
|
1291 |
||
1292 |
Label L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1293 |
const Register from = rax; // source array address |
|
1294 |
const Register to = rdx; // destination array address |
|
1295 |
const Register count = rcx; // elements count |
|
1296 |
const Register end_from = rax; // source array end address |
|
1297 |
||
1298 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1299 |
__ movptr(from , Address(rsp, 8+0)); // from |
1300 |
__ movptr(to , Address(rsp, 8+4)); // to |
|
1301 |
__ movl2ptr(count, Address(rsp, 8+8)); // count |
|
1 | 1302 |
|
1303 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
|
1304 |
BLOCK_COMMENT("Entry:"); |
|
1305 |
||
1306 |
// arrays overlap test |
|
1066 | 1307 |
__ cmpptr(to, from); |
1 | 1308 |
RuntimeAddress nooverlap(nooverlap_target); |
1309 |
__ jump_cc(Assembler::belowEqual, nooverlap); |
|
1066 | 1310 |
__ lea(end_from, Address(from, count, Address::times_8, 0)); |
1311 |
__ cmpptr(to, end_from); |
|
1312 |
__ movptr(from, Address(rsp, 8)); // from |
|
1 | 1313 |
__ jump_cc(Assembler::aboveEqual, nooverlap); |
1314 |
||
1315 |
__ jmpb(L_copy_8_bytes); |
|
1316 |
||
5249 | 1317 |
__ align(OptoLoopAlignment); |
1 | 1318 |
__ BIND(L_copy_8_bytes_loop); |
1319 |
if (VM_Version::supports_mmx()) { |
|
1437 | 1320 |
if (UseXMMForArrayCopy) { |
1321 |
__ movq(xmm0, Address(from, count, Address::times_8)); |
|
1322 |
__ movq(Address(to, count, Address::times_8), xmm0); |
|
1323 |
} else { |
|
1324 |
__ movq(mmx0, Address(from, count, Address::times_8)); |
|
1325 |
__ movq(Address(to, count, Address::times_8), mmx0); |
|
1326 |
} |
|
1 | 1327 |
} else { |
1328 |
__ fild_d(Address(from, count, Address::times_8)); |
|
1329 |
__ fistp_d(Address(to, count, Address::times_8)); |
|
1330 |
} |
|
1331 |
__ BIND(L_copy_8_bytes); |
|
1332 |
__ decrement(count); |
|
1333 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1334 |
||
1437 | 1335 |
if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { |
1 | 1336 |
__ emms(); |
1337 |
} |
|
1338 |
inc_copy_counter_np(T_LONG); |
|
1339 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1340 |
__ xorptr(rax, rax); // return 0 |
1 | 1341 |
__ ret(0); |
1342 |
return start; |
|
1343 |
} |
|
1344 |
||
1345 |
||
1346 |
// Helper for generating a dynamic type check. |
|
1347 |
// The sub_klass must be one of {rbx, rdx, rsi}. |
|
1348 |
// The temp is killed. |
|
1349 |
void generate_type_check(Register sub_klass, |
|
1350 |
Address& super_check_offset_addr, |
|
1351 |
Address& super_klass_addr, |
|
1352 |
Register temp, |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1353 |
Label* L_success, Label* L_failure) { |
1 | 1354 |
BLOCK_COMMENT("type_check:"); |
1355 |
||
1356 |
Label L_fallthrough; |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1357 |
#define LOCAL_JCC(assembler_con, label_ptr) \ |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1358 |
if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \ |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1359 |
else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ |
1 | 1360 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1361 |
// The following is a strange variation of the fast path which requires |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1362 |
// one less register, because needed values are on the argument stack. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1363 |
// __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1364 |
// L_success, L_failure, NULL); |
1 | 1365 |
assert_different_registers(sub_klass, temp); |
1366 |
||
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1367 |
int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
1 | 1368 |
|
1369 |
// if the pointers are equal, we are done (e.g., String[] elements) |
|
1066 | 1370 |
__ cmpptr(sub_klass, super_klass_addr); |
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1371 |
LOCAL_JCC(Assembler::equal, L_success); |
1 | 1372 |
|
1373 |
// check the supertype display: |
|
1066 | 1374 |
__ movl2ptr(temp, super_check_offset_addr); |
1 | 1375 |
Address super_check_addr(sub_klass, temp, Address::times_1, 0); |
1066 | 1376 |
__ movptr(temp, super_check_addr); // load displayed supertype |
1377 |
__ cmpptr(temp, super_klass_addr); // test the super type |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1378 |
LOCAL_JCC(Assembler::equal, L_success); |
1 | 1379 |
|
1380 |
// if it was a primary super, we can just fail immediately |
|
1381 |
__ cmpl(super_check_offset_addr, sc_offset); |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1382 |
LOCAL_JCC(Assembler::notEqual, L_failure); |
1 | 1383 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1384 |
// The repne_scan instruction uses fixed registers, which will get spilled. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1385 |
// We happen to know this works best when super_klass is in rax. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1386 |
Register super_klass = temp; |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1387 |
__ movptr(super_klass, super_klass_addr); |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1388 |
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1389 |
L_success, L_failure); |
1 | 1390 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1391 |
__ bind(L_fallthrough); |
1 | 1392 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1393 |
if (L_success == NULL) { BLOCK_COMMENT("L_success:"); } |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1394 |
if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); } |
1 | 1395 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1396 |
#undef LOCAL_JCC |
1 | 1397 |
} |
1398 |
||
1399 |
// |
|
1400 |
// Generate checkcasting array copy stub |
|
1401 |
// |
|
1402 |
// Input: |
|
1403 |
// 4(rsp) - source array address |
|
1404 |
// 8(rsp) - destination array address |
|
1405 |
// 12(rsp) - element count, can be zero |
|
1406 |
// 16(rsp) - size_t ckoff (super_check_offset) |
|
1407 |
// 20(rsp) - oop ckval (super_klass) |
|
1408 |
// |
|
1409 |
// Output: |
|
1410 |
// rax, == 0 - success |
|
1411 |
// rax, == -1^K - failure, where K is partial transfer count |
|
1412 |
// |
|
8498 | 1413 |
address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) { |
1 | 1414 |
__ align(CodeEntryAlignment); |
1415 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1416 |
address start = __ pc(); |
|
1417 |
||
1418 |
Label L_load_element, L_store_element, L_do_card_marks, L_done; |
|
1419 |
||
1420 |
// register use: |
|
1421 |
// rax, rdx, rcx -- loop control (end_from, end_to, count) |
|
1422 |
// rdi, rsi -- element access (oop, klass) |
|
1423 |
// rbx, -- temp |
|
1424 |
const Register from = rax; // source array address |
|
1425 |
const Register to = rdx; // destination array address |
|
1426 |
const Register length = rcx; // elements count |
|
1427 |
const Register elem = rdi; // each oop copied |
|
1428 |
const Register elem_klass = rsi; // each elem._klass (sub_klass) |
|
1429 |
const Register temp = rbx; // lone remaining temp |
|
1430 |
||
1431 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1432 |
||
1066 | 1433 |
__ push(rsi); |
1434 |
__ push(rdi); |
|
1435 |
__ push(rbx); |
|
1 | 1436 |
|
1437 |
Address from_arg(rsp, 16+ 4); // from |
|
1438 |
Address to_arg(rsp, 16+ 8); // to |
|
1439 |
Address length_arg(rsp, 16+12); // elements count |
|
1440 |
Address ckoff_arg(rsp, 16+16); // super_check_offset |
|
1441 |
Address ckval_arg(rsp, 16+20); // super_klass |
|
1442 |
||
1443 |
// Load up: |
|
1066 | 1444 |
__ movptr(from, from_arg); |
1445 |
__ movptr(to, to_arg); |
|
1446 |
__ movl2ptr(length, length_arg); |
|
1 | 1447 |
|
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1448 |
if (entry != NULL) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1449 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1450 |
BLOCK_COMMENT("Entry:"); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1451 |
} |
1 | 1452 |
|
1453 |
//--------------------------------------------------------------- |
|
1454 |
// Assembler stub will be used for this call to arraycopy |
|
1455 |
// if the two arrays are subtypes of Object[] but the |
|
1456 |
// destination array type is not equal to or a supertype |
|
1457 |
// of the source type. Each element must be separately |
|
1458 |
// checked. |
|
1459 |
||
1460 |
// Loop-invariant addresses. They are exclusive end pointers. |
|
1066 | 1461 |
Address end_from_addr(from, length, Address::times_ptr, 0); |
1462 |
Address end_to_addr(to, length, Address::times_ptr, 0); |
|
1 | 1463 |
|
1464 |
Register end_from = from; // re-use |
|
1465 |
Register end_to = to; // re-use |
|
1466 |
Register count = length; // re-use |
|
1467 |
||
1468 |
// Loop-variant addresses. They assume post-incremented count < 0. |
|
1066 | 1469 |
Address from_element_addr(end_from, count, Address::times_ptr, 0); |
1470 |
Address to_element_addr(end_to, count, Address::times_ptr, 0); |
|
1 | 1471 |
Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); |
1472 |
||
1473 |
// Copy from low to high addresses, indexed from the end of each array. |
|
8498 | 1474 |
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
1066 | 1475 |
__ lea(end_from, end_from_addr); |
1476 |
__ lea(end_to, end_to_addr); |
|
1 | 1477 |
assert(length == count, ""); // else fix next line: |
1066 | 1478 |
__ negptr(count); // negate and test the length |
1 | 1479 |
__ jccb(Assembler::notZero, L_load_element); |
1480 |
||
1481 |
// Empty array: Nothing to do. |
|
1066 | 1482 |
__ xorptr(rax, rax); // return 0 on (trivial) success |
1 | 1483 |
__ jmp(L_done); |
1484 |
||
1485 |
// ======== begin loop ======== |
|
1486 |
// (Loop is rotated; its entry is L_load_element.) |
|
1487 |
// Loop control: |
|
1488 |
// for (count = -count; count != 0; count++) |
|
1489 |
// Base pointers src, dst are biased by 8*count,to last element. |
|
5249 | 1490 |
__ align(OptoLoopAlignment); |
1 | 1491 |
|
1492 |
__ BIND(L_store_element); |
|
1066 | 1493 |
__ movptr(to_element_addr, elem); // store the oop |
1 | 1494 |
__ increment(count); // increment the count toward zero |
1495 |
__ jccb(Assembler::zero, L_do_card_marks); |
|
1496 |
||
1497 |
// ======== loop entry is here ======== |
|
1498 |
__ BIND(L_load_element); |
|
1066 | 1499 |
__ movptr(elem, from_element_addr); // load the oop |
1500 |
__ testptr(elem, elem); |
|
1 | 1501 |
__ jccb(Assembler::zero, L_store_element); |
1502 |
||
1503 |
// (Could do a trick here: Remember last successful non-null |
|
1504 |
// element stored and make a quick oop equality check on it.) |
|
1505 |
||
1066 | 1506 |
__ movptr(elem_klass, elem_klass_addr); // query the object klass |
1 | 1507 |
generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp, |
1508 |
&L_store_element, NULL); |
|
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1509 |
// (On fall-through, we have failed the element type check.) |
1 | 1510 |
// ======== end loop ======== |
1511 |
||
1512 |
// It was a real error; we must depend on the caller to finish the job. |
|
192
d66300cdf939
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
189
diff
changeset
|
1513 |
// Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops. |
d66300cdf939
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
189
diff
changeset
|
1514 |
// Emit GC store barriers for the oops we have copied (length_arg + count), |
1 | 1515 |
// and report their number to the caller. |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1516 |
assert_different_registers(to, count, rax); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1517 |
Label L_post_barrier; |
1 | 1518 |
__ addl(count, length_arg); // transfers = (length - remaining) |
1066 | 1519 |
__ movl2ptr(rax, count); // save the value |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1520 |
__ notptr(rax); // report (-1^K) to caller (does not affect flags) |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1521 |
__ jccb(Assembler::notZero, L_post_barrier); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1522 |
__ jmp(L_done); // K == 0, nothing was copied, skip post barrier |
1 | 1523 |
|
1524 |
// Come here on success only. |
|
1525 |
__ BIND(L_do_card_marks); |
|
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1526 |
__ xorptr(rax, rax); // return 0 on success |
1066 | 1527 |
__ movl2ptr(count, length_arg); |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1528 |
|
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1529 |
__ BIND(L_post_barrier); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1530 |
__ movptr(to, to_arg); // reload |
1 | 1531 |
gen_write_ref_array_post_barrier(to, count); |
1532 |
||
1533 |
// Common exit point (success or failure). |
|
1534 |
__ BIND(L_done); |
|
1066 | 1535 |
__ pop(rbx); |
1536 |
__ pop(rdi); |
|
1537 |
__ pop(rsi); |
|
1 | 1538 |
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); |
1539 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1540 |
__ ret(0); |
|
1541 |
||
1542 |
return start; |
|
1543 |
} |
|
1544 |
||
1545 |
// |
|
1546 |
// Generate 'unsafe' array copy stub |
|
1547 |
// Though just as safe as the other stubs, it takes an unscaled |
|
1548 |
// size_t argument instead of an element count. |
|
1549 |
// |
|
1550 |
// Input: |
|
1551 |
// 4(rsp) - source array address |
|
1552 |
// 8(rsp) - destination array address |
|
1553 |
// 12(rsp) - byte count, can be zero |
|
1554 |
// |
|
1555 |
// Output: |
|
1556 |
// rax, == 0 - success |
|
1557 |
// rax, == -1 - need to call System.arraycopy |
|
1558 |
// |
|
1559 |
// Examines the alignment of the operands and dispatches |
|
1560 |
// to a long, int, short, or byte copy loop. |
|
1561 |
// |
|
1562 |
address generate_unsafe_copy(const char *name, |
|
1563 |
address byte_copy_entry, |
|
1564 |
address short_copy_entry, |
|
1565 |
address int_copy_entry, |
|
1566 |
address long_copy_entry) { |
|
1567 |
||
1568 |
Label L_long_aligned, L_int_aligned, L_short_aligned; |
|
1569 |
||
1570 |
__ align(CodeEntryAlignment); |
|
1571 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1572 |
address start = __ pc(); |
|
1573 |
||
1574 |
const Register from = rax; // source array address |
|
1575 |
const Register to = rdx; // destination array address |
|
1576 |
const Register count = rcx; // elements count |
|
1577 |
||
1578 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1579 |
__ push(rsi); |
1580 |
__ push(rdi); |
|
1 | 1581 |
Address from_arg(rsp, 12+ 4); // from |
1582 |
Address to_arg(rsp, 12+ 8); // to |
|
1583 |
Address count_arg(rsp, 12+12); // byte count |
|
1584 |
||
1585 |
// Load up: |
|
1066 | 1586 |
__ movptr(from , from_arg); |
1587 |
__ movptr(to , to_arg); |
|
1588 |
__ movl2ptr(count, count_arg); |
|
1 | 1589 |
|
1590 |
// bump this on entry, not on exit: |
|
1591 |
inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); |
|
1592 |
||
1593 |
const Register bits = rsi; |
|
1066 | 1594 |
__ mov(bits, from); |
1595 |
__ orptr(bits, to); |
|
1596 |
__ orptr(bits, count); |
|
1 | 1597 |
|
1598 |
__ testl(bits, BytesPerLong-1); |
|
1599 |
__ jccb(Assembler::zero, L_long_aligned); |
|
1600 |
||
1601 |
__ testl(bits, BytesPerInt-1); |
|
1602 |
__ jccb(Assembler::zero, L_int_aligned); |
|
1603 |
||
1604 |
__ testl(bits, BytesPerShort-1); |
|
1605 |
__ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); |
|
1606 |
||
1607 |
__ BIND(L_short_aligned); |
|
1066 | 1608 |
__ shrptr(count, LogBytesPerShort); // size => short_count |
1 | 1609 |
__ movl(count_arg, count); // update 'count' |
1610 |
__ jump(RuntimeAddress(short_copy_entry)); |
|
1611 |
||
1612 |
__ BIND(L_int_aligned); |
|
1066 | 1613 |
__ shrptr(count, LogBytesPerInt); // size => int_count |
1 | 1614 |
__ movl(count_arg, count); // update 'count' |
1615 |
__ jump(RuntimeAddress(int_copy_entry)); |
|
1616 |
||
1617 |
__ BIND(L_long_aligned); |
|
1066 | 1618 |
__ shrptr(count, LogBytesPerLong); // size => qword_count |
1 | 1619 |
__ movl(count_arg, count); // update 'count' |
1066 | 1620 |
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1621 |
__ pop(rsi); |
|
1 | 1622 |
__ jump(RuntimeAddress(long_copy_entry)); |
1623 |
||
1624 |
return start; |
|
1625 |
} |
|
1626 |
||
1627 |
||
1628 |
// Perform range checks on the proposed arraycopy. |
|
1629 |
// Smashes src_pos and dst_pos. (Uses them up for temps.) |
|
1630 |
void arraycopy_range_checks(Register src, |
|
1631 |
Register src_pos, |
|
1632 |
Register dst, |
|
1633 |
Register dst_pos, |
|
1634 |
Address& length, |
|
1635 |
Label& L_failed) { |
|
1636 |
BLOCK_COMMENT("arraycopy_range_checks:"); |
|
1637 |
const Register src_end = src_pos; // source array end position |
|
1638 |
const Register dst_end = dst_pos; // destination array end position |
|
1639 |
__ addl(src_end, length); // src_pos + length |
|
1640 |
__ addl(dst_end, length); // dst_pos + length |
|
1641 |
||
1642 |
// if (src_pos + length > arrayOop(src)->length() ) FAIL; |
|
1643 |
__ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes())); |
|
1644 |
__ jcc(Assembler::above, L_failed); |
|
1645 |
||
1646 |
// if (dst_pos + length > arrayOop(dst)->length() ) FAIL; |
|
1647 |
__ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes())); |
|
1648 |
__ jcc(Assembler::above, L_failed); |
|
1649 |
||
1650 |
BLOCK_COMMENT("arraycopy_range_checks done"); |
|
1651 |
} |
|
1652 |
||
1653 |
||
1654 |
// |
|
1655 |
// Generate generic array copy stubs |
|
1656 |
// |
|
1657 |
// Input: |
|
1658 |
// 4(rsp) - src oop |
|
1659 |
// 8(rsp) - src_pos |
|
1660 |
// 12(rsp) - dst oop |
|
1661 |
// 16(rsp) - dst_pos |
|
1662 |
// 20(rsp) - element count |
|
1663 |
// |
|
1664 |
// Output: |
|
1665 |
// rax, == 0 - success |
|
1666 |
// rax, == -1^K - failure, where K is partial transfer count |
|
1667 |
// |
|
1668 |
address generate_generic_copy(const char *name, |
|
1669 |
address entry_jbyte_arraycopy, |
|
1670 |
address entry_jshort_arraycopy, |
|
1671 |
address entry_jint_arraycopy, |
|
1672 |
address entry_oop_arraycopy, |
|
1673 |
address entry_jlong_arraycopy, |
|
1674 |
address entry_checkcast_arraycopy) { |
|
1675 |
Label L_failed, L_failed_0, L_objArray; |
|
1676 |
||
1677 |
{ int modulus = CodeEntryAlignment; |
|
1678 |
int target = modulus - 5; // 5 = sizeof jmp(L_failed) |
|
1679 |
int advance = target - (__ offset() % modulus); |
|
1680 |
if (advance < 0) advance += modulus; |
|
1681 |
if (advance > 0) __ nop(advance); |
|
1682 |
} |
|
1683 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1684 |
||
1685 |
// Short-hop target to L_failed. Makes for denser prologue code. |
|
1686 |
__ BIND(L_failed_0); |
|
1687 |
__ jmp(L_failed); |
|
1688 |
assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); |
|
1689 |
||
1690 |
__ align(CodeEntryAlignment); |
|
1691 |
address start = __ pc(); |
|
1692 |
||
1693 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1694 |
__ push(rsi); |
1695 |
__ push(rdi); |
|
1 | 1696 |
|
1697 |
// bump this on entry, not on exit: |
|
1698 |
inc_counter_np(SharedRuntime::_generic_array_copy_ctr); |
|
1699 |
||
1700 |
// Input values |
|
1701 |
Address SRC (rsp, 12+ 4); |
|
1702 |
Address SRC_POS (rsp, 12+ 8); |
|
1703 |
Address DST (rsp, 12+12); |
|
1704 |
Address DST_POS (rsp, 12+16); |
|
1705 |
Address LENGTH (rsp, 12+20); |
|
1706 |
||
1707 |
//----------------------------------------------------------------------- |
|
1708 |
// Assembler stub will be used for this call to arraycopy |
|
1709 |
// if the following conditions are met: |
|
1710 |
// |
|
1711 |
// (1) src and dst must not be null. |
|
1712 |
// (2) src_pos must not be negative. |
|
1713 |
// (3) dst_pos must not be negative. |
|
1714 |
// (4) length must not be negative. |
|
1715 |
// (5) src klass and dst klass should be the same and not NULL. |
|
1716 |
// (6) src and dst should be arrays. |
|
1717 |
// (7) src_pos + length must not exceed length of src. |
|
1718 |
// (8) dst_pos + length must not exceed length of dst. |
|
1719 |
// |
|
1720 |
||
1721 |
const Register src = rax; // source array oop |
|
1722 |
const Register src_pos = rsi; |
|
1723 |
const Register dst = rdx; // destination array oop |
|
1724 |
const Register dst_pos = rdi; |
|
1725 |
const Register length = rcx; // transfer count |
|
1726 |
||
1727 |
// if (src == NULL) return -1; |
|
1066 | 1728 |
__ movptr(src, SRC); // src oop |
1729 |
__ testptr(src, src); |
|
1 | 1730 |
__ jccb(Assembler::zero, L_failed_0); |
1731 |
||
1732 |
// if (src_pos < 0) return -1; |
|
1066 | 1733 |
__ movl2ptr(src_pos, SRC_POS); // src_pos |
1 | 1734 |
__ testl(src_pos, src_pos); |
1735 |
__ jccb(Assembler::negative, L_failed_0); |
|
1736 |
||
1737 |
// if (dst == NULL) return -1; |
|
1066 | 1738 |
__ movptr(dst, DST); // dst oop |
1739 |
__ testptr(dst, dst); |
|
1 | 1740 |
__ jccb(Assembler::zero, L_failed_0); |
1741 |
||
1742 |
// if (dst_pos < 0) return -1; |
|
1066 | 1743 |
__ movl2ptr(dst_pos, DST_POS); // dst_pos |
1 | 1744 |
__ testl(dst_pos, dst_pos); |
1745 |
__ jccb(Assembler::negative, L_failed_0); |
|
1746 |
||
1747 |
// if (length < 0) return -1; |
|
1066 | 1748 |
__ movl2ptr(length, LENGTH); // length |
1 | 1749 |
__ testl(length, length); |
1750 |
__ jccb(Assembler::negative, L_failed_0); |
|
1751 |
||
1752 |
// if (src->klass() == NULL) return -1; |
|
1753 |
Address src_klass_addr(src, oopDesc::klass_offset_in_bytes()); |
|
1754 |
Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes()); |
|
1755 |
const Register rcx_src_klass = rcx; // array klass |
|
1066 | 1756 |
__ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes())); |
1 | 1757 |
|
1758 |
#ifdef ASSERT |
|
1759 |
// assert(src->klass() != NULL); |
|
1760 |
BLOCK_COMMENT("assert klasses not null"); |
|
1761 |
{ Label L1, L2; |
|
1066 | 1762 |
__ testptr(rcx_src_klass, rcx_src_klass); |
1 | 1763 |
__ jccb(Assembler::notZero, L2); // it is broken if klass is NULL |
1764 |
__ bind(L1); |
|
1765 |
__ stop("broken null klass"); |
|
1766 |
__ bind(L2); |
|
1066 | 1767 |
__ cmpptr(dst_klass_addr, (int32_t)NULL_WORD); |
1 | 1768 |
__ jccb(Assembler::equal, L1); // this would be broken also |
1769 |
BLOCK_COMMENT("assert done"); |
|
1770 |
} |
|
1771 |
#endif //ASSERT |
|
1772 |
||
1773 |
// Load layout helper (32-bits) |
|
1774 |
// |
|
1775 |
// |array_tag| | header_size | element_type | |log2_element_size| |
|
1776 |
// 32 30 24 16 8 2 0 |
|
1777 |
// |
|
1778 |
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 |
|
1779 |
// |
|
1780 |
||
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1781 |
int lh_offset = in_bytes(Klass::layout_helper_offset()); |
1 | 1782 |
Address src_klass_lh_addr(rcx_src_klass, lh_offset); |
1783 |
||
1784 |
// Handle objArrays completely differently... |
|
1785 |
jint objArray_lh = Klass::array_layout_helper(T_OBJECT); |
|
1786 |
__ cmpl(src_klass_lh_addr, objArray_lh); |
|
1787 |
__ jcc(Assembler::equal, L_objArray); |
|
1788 |
||
1789 |
// if (src->klass() != dst->klass()) return -1; |
|
1066 | 1790 |
__ cmpptr(rcx_src_klass, dst_klass_addr); |
1 | 1791 |
__ jccb(Assembler::notEqual, L_failed_0); |
1792 |
||
1793 |
const Register rcx_lh = rcx; // layout helper |
|
1794 |
assert(rcx_lh == rcx_src_klass, "known alias"); |
|
1795 |
__ movl(rcx_lh, src_klass_lh_addr); |
|
1796 |
||
1797 |
// if (!src->is_Array()) return -1; |
|
1798 |
__ cmpl(rcx_lh, Klass::_lh_neutral_value); |
|
1799 |
__ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp |
|
1800 |
||
1801 |
// At this point, it is known to be a typeArray (array_tag 0x3). |
|
1802 |
#ifdef ASSERT |
|
1803 |
{ Label L; |
|
1804 |
__ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); |
|
1805 |
__ jcc(Assembler::greaterEqual, L); // signed cmp |
|
1806 |
__ stop("must be a primitive array"); |
|
1807 |
__ bind(L); |
|
1808 |
} |
|
1809 |
#endif |
|
1810 |
||
1811 |
assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); |
|
1812 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
|
1813 |
||
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1814 |
// TypeArrayKlass |
1 | 1815 |
// |
1816 |
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); |
|
1817 |
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); |
|
1818 |
// |
|
1819 |
const Register rsi_offset = rsi; // array offset |
|
1820 |
const Register src_array = src; // src array offset |
|
1821 |
const Register dst_array = dst; // dst array offset |
|
1822 |
const Register rdi_elsize = rdi; // log2 element size |
|
1823 |
||
1066 | 1824 |
__ mov(rsi_offset, rcx_lh); |
1825 |
__ shrptr(rsi_offset, Klass::_lh_header_size_shift); |
|
1826 |
__ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset |
|
1827 |
__ addptr(src_array, rsi_offset); // src array offset |
|
1828 |
__ addptr(dst_array, rsi_offset); // dst array offset |
|
1829 |
__ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize |
|
1 | 1830 |
|
1831 |
// next registers should be set before the jump to corresponding stub |
|
1832 |
const Register from = src; // source array address |
|
1833 |
const Register to = dst; // destination array address |
|
1834 |
const Register count = rcx; // elements count |
|
1835 |
// some of them should be duplicated on stack |
|
1836 |
#define FROM Address(rsp, 12+ 4) |
|
1837 |
#define TO Address(rsp, 12+ 8) // Not used now |
|
1838 |
#define COUNT Address(rsp, 12+12) // Only for oop arraycopy |
|
1839 |
||
1840 |
BLOCK_COMMENT("scale indexes to element size"); |
|
1066 | 1841 |
__ movl2ptr(rsi, SRC_POS); // src_pos |
1842 |
__ shlptr(rsi); // src_pos << rcx (log2 elsize) |
|
1 | 1843 |
assert(src_array == from, ""); |
1066 | 1844 |
__ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize |
1845 |
__ movl2ptr(rdi, DST_POS); // dst_pos |
|
1846 |
__ shlptr(rdi); // dst_pos << rcx (log2 elsize) |
|
1 | 1847 |
assert(dst_array == to, ""); |
1066 | 1848 |
__ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize |
1849 |
__ movptr(FROM, from); // src_addr |
|
1850 |
__ mov(rdi_elsize, rcx_lh); // log2 elsize |
|
1851 |
__ movl2ptr(count, LENGTH); // elements count |
|
1 | 1852 |
|
1853 |
BLOCK_COMMENT("choose copy loop based on element size"); |
|
1854 |
__ cmpl(rdi_elsize, 0); |
|
1855 |
||
1856 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy)); |
|
1857 |
__ cmpl(rdi_elsize, LogBytesPerShort); |
|
1858 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy)); |
|
1859 |
__ cmpl(rdi_elsize, LogBytesPerInt); |
|
1860 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy)); |
|
1861 |
#ifdef ASSERT |
|
1862 |
__ cmpl(rdi_elsize, LogBytesPerLong); |
|
1863 |
__ jccb(Assembler::notEqual, L_failed); |
|
1864 |
#endif |
|
1066 | 1865 |
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1866 |
__ pop(rsi); |
|
1 | 1867 |
__ jump(RuntimeAddress(entry_jlong_arraycopy)); |
1868 |
||
1869 |
__ BIND(L_failed); |
|
1066 | 1870 |
__ xorptr(rax, rax); |
1871 |
__ notptr(rax); // return -1 |
|
1872 |
__ pop(rdi); |
|
1873 |
__ pop(rsi); |
|
1 | 1874 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1875 |
__ ret(0); |
|
1876 |
||
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1877 |
// ObjArrayKlass |
1 | 1878 |
__ BIND(L_objArray); |
1879 |
// live at this point: rcx_src_klass, src[_pos], dst[_pos] |
|
1880 |
||
1881 |
Label L_plain_copy, L_checkcast_copy; |
|
1882 |
// test array classes for subtyping |
|
1066 | 1883 |
__ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality |
1 | 1884 |
__ jccb(Assembler::notEqual, L_checkcast_copy); |
1885 |
||
1886 |
// Identically typed arrays can be copied without element-wise checks. |
|
1887 |
assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass); |
|
1888 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
|
1889 |
||
1890 |
__ BIND(L_plain_copy); |
|
1066 | 1891 |
__ movl2ptr(count, LENGTH); // elements count |
1892 |
__ movl2ptr(src_pos, SRC_POS); // reload src_pos |
|
1893 |
__ lea(from, Address(src, src_pos, Address::times_ptr, |
|
1894 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr |
|
1895 |
__ movl2ptr(dst_pos, DST_POS); // reload dst_pos |
|
1896 |
__ lea(to, Address(dst, dst_pos, Address::times_ptr, |
|
1897 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr |
|
1898 |
__ movptr(FROM, from); // src_addr |
|
1899 |
__ movptr(TO, to); // dst_addr |
|
1 | 1900 |
__ movl(COUNT, count); // count |
1901 |
__ jump(RuntimeAddress(entry_oop_arraycopy)); |
|
1902 |
||
1903 |
__ BIND(L_checkcast_copy); |
|
1904 |
// live at this point: rcx_src_klass, dst[_pos], src[_pos] |
|
1905 |
{ |
|
1906 |
// Handy offsets: |
|
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1907 |
int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); |
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1908 |
int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
1 | 1909 |
|
1910 |
Register rsi_dst_klass = rsi; |
|
1911 |
Register rdi_temp = rdi; |
|
1912 |
assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); |
|
1913 |
assert(rdi_temp == dst_pos, "expected alias w/ dst_pos"); |
|
1914 |
Address dst_klass_lh_addr(rsi_dst_klass, lh_offset); |
|
1915 |
||
1916 |
// Before looking at dst.length, make sure dst is also an objArray. |
|
1066 | 1917 |
__ movptr(rsi_dst_klass, dst_klass_addr); |
1 | 1918 |
__ cmpl(dst_klass_lh_addr, objArray_lh); |
1919 |
__ jccb(Assembler::notEqual, L_failed); |
|
1920 |
||
1921 |
// It is safe to examine both src.length and dst.length. |
|
1066 | 1922 |
__ movl2ptr(src_pos, SRC_POS); // reload rsi |
1 | 1923 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
1924 |
// (Now src_pos and dst_pos are killed, but not src and dst.) |
|
1925 |
||
1926 |
// We'll need this temp (don't forget to pop it after the type check). |
|
1066 | 1927 |
__ push(rbx); |
1 | 1928 |
Register rbx_src_klass = rbx; |
1929 |
||
1066 | 1930 |
__ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx |
1931 |
__ movptr(rsi_dst_klass, dst_klass_addr); |
|
1 | 1932 |
Address super_check_offset_addr(rsi_dst_klass, sco_offset); |
1933 |
Label L_fail_array_check; |
|
1934 |
generate_type_check(rbx_src_klass, |
|
1935 |
super_check_offset_addr, dst_klass_addr, |
|
1936 |
rdi_temp, NULL, &L_fail_array_check); |
|
1937 |
// (On fall-through, we have passed the array type check.) |
|
1066 | 1938 |
__ pop(rbx); |
1 | 1939 |
__ jmp(L_plain_copy); |
1940 |
||
1941 |
__ BIND(L_fail_array_check); |
|
1942 |
// Reshuffle arguments so we can call checkcast_arraycopy: |
|
1943 |
||
1944 |
// match initial saves for checkcast_arraycopy |
|
1066 | 1945 |
// push(rsi); // already done; see above |
1946 |
// push(rdi); // already done; see above |
|
1947 |
// push(rbx); // already done; see above |
|
1 | 1948 |
|
1949 |
// Marshal outgoing arguments now, freeing registers. |
|
1950 |
Address from_arg(rsp, 16+ 4); // from |
|
1951 |
Address to_arg(rsp, 16+ 8); // to |
|
1952 |
Address length_arg(rsp, 16+12); // elements count |
|
1953 |
Address ckoff_arg(rsp, 16+16); // super_check_offset |
|
1954 |
Address ckval_arg(rsp, 16+20); // super_klass |
|
1955 |
||
1956 |
Address SRC_POS_arg(rsp, 16+ 8); |
|
1957 |
Address DST_POS_arg(rsp, 16+16); |
|
1958 |
Address LENGTH_arg(rsp, 16+20); |
|
1959 |
// push rbx, changed the incoming offsets (why not just use rbp,??) |
|
1960 |
// assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, ""); |
|
1961 |
||
1066 | 1962 |
__ movptr(rbx, Address(rsi_dst_klass, ek_offset)); |
1963 |
__ movl2ptr(length, LENGTH_arg); // reload elements count |
|
1964 |
__ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos |
|
1965 |
__ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos |
|
1 | 1966 |
|
1066 | 1967 |
__ movptr(ckval_arg, rbx); // destination element type |
1 | 1968 |
__ movl(rbx, Address(rbx, sco_offset)); |
1969 |
__ movl(ckoff_arg, rbx); // corresponding class check offset |
|
1970 |
||
1971 |
__ movl(length_arg, length); // outgoing length argument |
|
1972 |
||
1066 | 1973 |
__ lea(from, Address(src, src_pos, Address::times_ptr, |
1 | 1974 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
1066 | 1975 |
__ movptr(from_arg, from); |
1 | 1976 |
|
1066 | 1977 |
__ lea(to, Address(dst, dst_pos, Address::times_ptr, |
1 | 1978 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
1066 | 1979 |
__ movptr(to_arg, to); |
1 | 1980 |
__ jump(RuntimeAddress(entry_checkcast_arraycopy)); |
1981 |
} |
|
1982 |
||
1983 |
return start; |
|
1984 |
} |
|
1985 |
||
1986 |
void generate_arraycopy_stubs() { |
|
1987 |
address entry; |
|
1988 |
address entry_jbyte_arraycopy; |
|
1989 |
address entry_jshort_arraycopy; |
|
1990 |
address entry_jint_arraycopy; |
|
1991 |
address entry_oop_arraycopy; |
|
1992 |
address entry_jlong_arraycopy; |
|
1993 |
address entry_checkcast_arraycopy; |
|
1994 |
||
1995 |
StubRoutines::_arrayof_jbyte_disjoint_arraycopy = |
|
1996 |
generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry, |
|
1997 |
"arrayof_jbyte_disjoint_arraycopy"); |
|
1998 |
StubRoutines::_arrayof_jbyte_arraycopy = |
|
1999 |
generate_conjoint_copy(T_BYTE, true, Address::times_1, entry, |
|
2000 |
NULL, "arrayof_jbyte_arraycopy"); |
|
2001 |
StubRoutines::_jbyte_disjoint_arraycopy = |
|
2002 |
generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry, |
|
2003 |
"jbyte_disjoint_arraycopy"); |
|
2004 |
StubRoutines::_jbyte_arraycopy = |
|
2005 |
generate_conjoint_copy(T_BYTE, false, Address::times_1, entry, |
|
2006 |
&entry_jbyte_arraycopy, "jbyte_arraycopy"); |
|
2007 |
||
2008 |
StubRoutines::_arrayof_jshort_disjoint_arraycopy = |
|
2009 |
generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry, |
|
2010 |
"arrayof_jshort_disjoint_arraycopy"); |
|
2011 |
StubRoutines::_arrayof_jshort_arraycopy = |
|
2012 |
generate_conjoint_copy(T_SHORT, true, Address::times_2, entry, |
|
2013 |
NULL, "arrayof_jshort_arraycopy"); |
|
2014 |
StubRoutines::_jshort_disjoint_arraycopy = |
|
2015 |
generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry, |
|
2016 |
"jshort_disjoint_arraycopy"); |
|
2017 |
StubRoutines::_jshort_arraycopy = |
|
2018 |
generate_conjoint_copy(T_SHORT, false, Address::times_2, entry, |
|
2019 |
&entry_jshort_arraycopy, "jshort_arraycopy"); |
|
2020 |
||
2021 |
// Next arrays are always aligned on 4 bytes at least. |
|
2022 |
StubRoutines::_jint_disjoint_arraycopy = |
|
2023 |
generate_disjoint_copy(T_INT, true, Address::times_4, &entry, |
|
2024 |
"jint_disjoint_arraycopy"); |
|
2025 |
StubRoutines::_jint_arraycopy = |
|
2026 |
generate_conjoint_copy(T_INT, true, Address::times_4, entry, |
|
2027 |
&entry_jint_arraycopy, "jint_arraycopy"); |
|
2028 |
||
2029 |
StubRoutines::_oop_disjoint_arraycopy = |
|
1066 | 2030 |
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
1 | 2031 |
"oop_disjoint_arraycopy"); |
2032 |
StubRoutines::_oop_arraycopy = |
|
1066 | 2033 |
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
1 | 2034 |
&entry_oop_arraycopy, "oop_arraycopy"); |
2035 |
||
8498 | 2036 |
StubRoutines::_oop_disjoint_arraycopy_uninit = |
2037 |
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
|
2038 |
"oop_disjoint_arraycopy_uninit", |
|
2039 |
/*dest_uninitialized*/true); |
|
2040 |
StubRoutines::_oop_arraycopy_uninit = |
|
2041 |
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
|
2042 |
NULL, "oop_arraycopy_uninit", |
|
2043 |
/*dest_uninitialized*/true); |
|
2044 |
||
1 | 2045 |
StubRoutines::_jlong_disjoint_arraycopy = |
2046 |
generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy"); |
|
2047 |
StubRoutines::_jlong_arraycopy = |
|
2048 |
generate_conjoint_long_copy(entry, &entry_jlong_arraycopy, |
|
2049 |
"jlong_arraycopy"); |
|
2050 |
||
6433 | 2051 |
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); |
2052 |
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); |
|
2053 |
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); |
|
2054 |
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); |
|
2055 |
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); |
|
2056 |
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); |
|
2057 |
||
8498 | 2058 |
StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; |
2059 |
StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; |
|
2060 |
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; |
|
2061 |
StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; |
|
1 | 2062 |
|
8498 | 2063 |
StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; |
2064 |
StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; |
|
2065 |
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; |
|
2066 |
StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; |
|
1 | 2067 |
|
2068 |
StubRoutines::_checkcast_arraycopy = |
|
8498 | 2069 |
generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); |
2070 |
StubRoutines::_checkcast_arraycopy_uninit = |
|
2071 |
generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true); |
|
1 | 2072 |
|
2073 |
StubRoutines::_unsafe_arraycopy = |
|
2074 |
generate_unsafe_copy("unsafe_arraycopy", |
|
2075 |
entry_jbyte_arraycopy, |
|
2076 |
entry_jshort_arraycopy, |
|
2077 |
entry_jint_arraycopy, |
|
2078 |
entry_jlong_arraycopy); |
|
2079 |
||
2080 |
StubRoutines::_generic_arraycopy = |
|
2081 |
generate_generic_copy("generic_arraycopy", |
|
2082 |
entry_jbyte_arraycopy, |
|
2083 |
entry_jshort_arraycopy, |
|
2084 |
entry_jint_arraycopy, |
|
2085 |
entry_oop_arraycopy, |
|
2086 |
entry_jlong_arraycopy, |
|
2087 |
entry_checkcast_arraycopy); |
|
2088 |
} |
|
2089 |
||
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2090 |
void generate_math_stubs() { |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2091 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2092 |
StubCodeMark mark(this, "StubRoutines", "log"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2093 |
StubRoutines::_intrinsic_log = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2094 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2095 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2096 |
__ flog(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2097 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2098 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2099 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2100 |
StubCodeMark mark(this, "StubRoutines", "log10"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2101 |
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2102 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2103 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2104 |
__ flog10(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2105 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2106 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2107 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2108 |
StubCodeMark mark(this, "StubRoutines", "sin"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2109 |
StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2110 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2111 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2112 |
__ trigfunc('s'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2113 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2114 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2115 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2116 |
StubCodeMark mark(this, "StubRoutines", "cos"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2117 |
StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2118 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2119 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2120 |
__ trigfunc('c'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2121 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2122 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2123 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2124 |
StubCodeMark mark(this, "StubRoutines", "tan"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2125 |
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2126 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2127 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2128 |
__ trigfunc('t'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2129 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2130 |
} |
12739
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2131 |
{ |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2132 |
StubCodeMark mark(this, "StubRoutines", "exp"); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2133 |
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc(); |
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2134 |
|
12739
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2135 |
__ fld_d(Address(rsp, 4)); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2136 |
__ exp_with_fallback(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2137 |
__ ret(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2138 |
} |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2139 |
{ |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2140 |
StubCodeMark mark(this, "StubRoutines", "pow"); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2141 |
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2142 |
|
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2143 |
__ fld_d(Address(rsp, 12)); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2144 |
__ fld_d(Address(rsp, 4)); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2145 |
__ pow_with_fallback(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2146 |
__ ret(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2147 |
} |
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2148 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2149 |
|
14132 | 2150 |
// AES intrinsic stubs |
2151 |
enum {AESBlockSize = 16}; |
|
2152 |
||
2153 |
address generate_key_shuffle_mask() { |
|
2154 |
__ align(16); |
|
2155 |
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); |
|
2156 |
address start = __ pc(); |
|
2157 |
__ emit_data(0x00010203, relocInfo::none, 0 ); |
|
2158 |
__ emit_data(0x04050607, relocInfo::none, 0 ); |
|
2159 |
__ emit_data(0x08090a0b, relocInfo::none, 0 ); |
|
2160 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); |
|
2161 |
return start; |
|
2162 |
} |
|
2163 |
||
2164 |
// Utility routine for loading a 128-bit key word in little endian format |
|
2165 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2166 |
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2167 |
__ movdqu(xmmdst, Address(key, offset)); |
|
2168 |
if (xmm_shuf_mask != NULL) { |
|
2169 |
__ pshufb(xmmdst, xmm_shuf_mask); |
|
2170 |
} else { |
|
2171 |
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2172 |
} |
|
2173 |
} |
|
2174 |
||
2175 |
// aesenc using specified key+offset |
|
2176 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2177 |
void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2178 |
load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2179 |
__ aesenc(xmmdst, xmmtmp); |
|
2180 |
} |
|
2181 |
||
2182 |
// aesdec using specified key+offset |
|
2183 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2184 |
void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2185 |
load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2186 |
__ aesdec(xmmdst, xmmtmp); |
|
2187 |
} |
|
2188 |
||
2189 |
||
2190 |
// Arguments: |
|
2191 |
// |
|
2192 |
// Inputs: |
|
2193 |
// c_rarg0 - source byte array address |
|
2194 |
// c_rarg1 - destination byte array address |
|
2195 |
// c_rarg2 - K (key) in little endian int array |
|
2196 |
// |
|
2197 |
address generate_aescrypt_encryptBlock() { |
|
14834 | 2198 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2199 |
__ align(CodeEntryAlignment); |
2200 |
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
|
2201 |
Label L_doLast; |
|
2202 |
address start = __ pc(); |
|
2203 |
||
14834 | 2204 |
const Register from = rdx; // source array address |
14132 | 2205 |
const Register to = rdx; // destination array address |
2206 |
const Register key = rcx; // key array address |
|
2207 |
const Register keylen = rax; |
|
2208 |
const Address from_param(rbp, 8+0); |
|
2209 |
const Address to_param (rbp, 8+4); |
|
2210 |
const Address key_param (rbp, 8+8); |
|
2211 |
||
2212 |
const XMMRegister xmm_result = xmm0; |
|
14834 | 2213 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
2214 |
const XMMRegister xmm_temp1 = xmm2; |
|
2215 |
const XMMRegister xmm_temp2 = xmm3; |
|
2216 |
const XMMRegister xmm_temp3 = xmm4; |
|
2217 |
const XMMRegister xmm_temp4 = xmm5; |
|
2218 |
||
2219 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2220 |
__ movptr(from, from_param); |
|
2221 |
__ movptr(key, key_param); |
|
2222 |
||
2223 |
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
14132 | 2224 |
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2225 |
||
2226 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2227 |
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
|
14834 | 2228 |
__ movptr(to, to_param); |
14132 | 2229 |
|
2230 |
// For encryption, the java expanded key ordering is just what we need |
|
2231 |
||
14834 | 2232 |
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); |
2233 |
__ pxor(xmm_result, xmm_temp1); |
|
2234 |
||
2235 |
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
|
2236 |
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
|
2237 |
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
|
2238 |
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
|
2239 |
||
2240 |
__ aesenc(xmm_result, xmm_temp1); |
|
2241 |
__ aesenc(xmm_result, xmm_temp2); |
|
2242 |
__ aesenc(xmm_result, xmm_temp3); |
|
2243 |
__ aesenc(xmm_result, xmm_temp4); |
|
2244 |
||
2245 |
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
|
2246 |
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
|
2247 |
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2248 |
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2249 |
||
2250 |
__ aesenc(xmm_result, xmm_temp1); |
|
2251 |
__ aesenc(xmm_result, xmm_temp2); |
|
2252 |
__ aesenc(xmm_result, xmm_temp3); |
|
2253 |
__ aesenc(xmm_result, xmm_temp4); |
|
2254 |
||
2255 |
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2256 |
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2257 |
||
2258 |
__ cmpl(keylen, 44); |
|
2259 |
__ jccb(Assembler::equal, L_doLast); |
|
2260 |
||
2261 |
__ aesenc(xmm_result, xmm_temp1); |
|
2262 |
__ aesenc(xmm_result, xmm_temp2); |
|
2263 |
||
2264 |
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2265 |
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2266 |
||
2267 |
__ cmpl(keylen, 52); |
|
2268 |
__ jccb(Assembler::equal, L_doLast); |
|
2269 |
||
2270 |
__ aesenc(xmm_result, xmm_temp1); |
|
2271 |
__ aesenc(xmm_result, xmm_temp2); |
|
2272 |
||
2273 |
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2274 |
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
|
14132 | 2275 |
|
2276 |
__ BIND(L_doLast); |
|
14834 | 2277 |
__ aesenc(xmm_result, xmm_temp1); |
2278 |
__ aesenclast(xmm_result, xmm_temp2); |
|
14132 | 2279 |
__ movdqu(Address(to, 0), xmm_result); // store the result |
2280 |
__ xorptr(rax, rax); // return 0 |
|
2281 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2282 |
__ ret(0); |
|
2283 |
||
2284 |
return start; |
|
2285 |
} |
|
2286 |
||
2287 |
||
2288 |
// Arguments: |
|
2289 |
// |
|
2290 |
// Inputs: |
|
2291 |
// c_rarg0 - source byte array address |
|
2292 |
// c_rarg1 - destination byte array address |
|
2293 |
// c_rarg2 - K (key) in little endian int array |
|
2294 |
// |
|
2295 |
address generate_aescrypt_decryptBlock() { |
|
14834 | 2296 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2297 |
__ align(CodeEntryAlignment); |
2298 |
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
|
2299 |
Label L_doLast; |
|
2300 |
address start = __ pc(); |
|
2301 |
||
14834 | 2302 |
const Register from = rdx; // source array address |
14132 | 2303 |
const Register to = rdx; // destination array address |
2304 |
const Register key = rcx; // key array address |
|
2305 |
const Register keylen = rax; |
|
2306 |
const Address from_param(rbp, 8+0); |
|
2307 |
const Address to_param (rbp, 8+4); |
|
2308 |
const Address key_param (rbp, 8+8); |
|
2309 |
||
2310 |
const XMMRegister xmm_result = xmm0; |
|
14834 | 2311 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
2312 |
const XMMRegister xmm_temp1 = xmm2; |
|
2313 |
const XMMRegister xmm_temp2 = xmm3; |
|
2314 |
const XMMRegister xmm_temp3 = xmm4; |
|
2315 |
const XMMRegister xmm_temp4 = xmm5; |
|
14132 | 2316 |
|
2317 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
14834 | 2318 |
__ movptr(from, from_param); |
2319 |
__ movptr(key, key_param); |
|
2320 |
||
2321 |
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
14132 | 2322 |
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2323 |
||
2324 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2325 |
__ movdqu(xmm_result, Address(from, 0)); |
|
14834 | 2326 |
__ movptr(to, to_param); |
14132 | 2327 |
|
2328 |
// for decryption java expanded key ordering is rotated one position from what we want |
|
2329 |
// so we start from 0x10 here and hit 0x00 last |
|
2330 |
// we don't know if the key is aligned, hence not using load-execute form |
|
14834 | 2331 |
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2332 |
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
|
2333 |
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
|
2334 |
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
|
2335 |
||
2336 |
__ pxor (xmm_result, xmm_temp1); |
|
2337 |
__ aesdec(xmm_result, xmm_temp2); |
|
2338 |
__ aesdec(xmm_result, xmm_temp3); |
|
2339 |
__ aesdec(xmm_result, xmm_temp4); |
|
2340 |
||
2341 |
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
|
2342 |
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
|
2343 |
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2344 |
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2345 |
||
2346 |
__ aesdec(xmm_result, xmm_temp1); |
|
2347 |
__ aesdec(xmm_result, xmm_temp2); |
|
2348 |
__ aesdec(xmm_result, xmm_temp3); |
|
2349 |
__ aesdec(xmm_result, xmm_temp4); |
|
2350 |
||
2351 |
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2352 |
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2353 |
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); |
|
2354 |
||
2355 |
__ cmpl(keylen, 44); |
|
2356 |
__ jccb(Assembler::equal, L_doLast); |
|
2357 |
||
2358 |
__ aesdec(xmm_result, xmm_temp1); |
|
2359 |
__ aesdec(xmm_result, xmm_temp2); |
|
2360 |
||
2361 |
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2362 |
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2363 |
||
2364 |
__ cmpl(keylen, 52); |
|
2365 |
__ jccb(Assembler::equal, L_doLast); |
|
2366 |
||
2367 |
__ aesdec(xmm_result, xmm_temp1); |
|
2368 |
__ aesdec(xmm_result, xmm_temp2); |
|
2369 |
||
2370 |
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2371 |
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
|
14132 | 2372 |
|
2373 |
__ BIND(L_doLast); |
|
14834 | 2374 |
__ aesdec(xmm_result, xmm_temp1); |
2375 |
__ aesdec(xmm_result, xmm_temp2); |
|
2376 |
||
14132 | 2377 |
// for decryption the aesdeclast operation is always on key+0x00 |
14834 | 2378 |
__ aesdeclast(xmm_result, xmm_temp3); |
14132 | 2379 |
__ movdqu(Address(to, 0), xmm_result); // store the result |
2380 |
__ xorptr(rax, rax); // return 0 |
|
2381 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2382 |
__ ret(0); |
|
2383 |
||
2384 |
return start; |
|
2385 |
} |
|
2386 |
||
2387 |
void handleSOERegisters(bool saving) { |
|
2388 |
const int saveFrameSizeInBytes = 4 * wordSize; |
|
2389 |
const Address saved_rbx (rbp, -3 * wordSize); |
|
2390 |
const Address saved_rsi (rbp, -2 * wordSize); |
|
2391 |
const Address saved_rdi (rbp, -1 * wordSize); |
|
2392 |
||
2393 |
if (saving) { |
|
2394 |
__ subptr(rsp, saveFrameSizeInBytes); |
|
2395 |
__ movptr(saved_rsi, rsi); |
|
2396 |
__ movptr(saved_rdi, rdi); |
|
2397 |
__ movptr(saved_rbx, rbx); |
|
2398 |
} else { |
|
2399 |
// restoring |
|
2400 |
__ movptr(rsi, saved_rsi); |
|
2401 |
__ movptr(rdi, saved_rdi); |
|
2402 |
__ movptr(rbx, saved_rbx); |
|
2403 |
} |
|
2404 |
} |
|
2405 |
||
2406 |
// Arguments: |
|
2407 |
// |
|
2408 |
// Inputs: |
|
2409 |
// c_rarg0 - source byte array address |
|
2410 |
// c_rarg1 - destination byte array address |
|
2411 |
// c_rarg2 - K (key) in little endian int array |
|
2412 |
// c_rarg3 - r vector byte array address |
|
2413 |
// c_rarg4 - input length |
|
2414 |
// |
|
22505 | 2415 |
// Output: |
2416 |
// rax - input length |
|
2417 |
// |
|
14132 | 2418 |
address generate_cipherBlockChaining_encryptAESCrypt() { |
14834 | 2419 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2420 |
__ align(CodeEntryAlignment); |
2421 |
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); |
|
2422 |
address start = __ pc(); |
|
2423 |
||
2424 |
Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; |
|
2425 |
const Register from = rsi; // source array address |
|
2426 |
const Register to = rdx; // destination array address |
|
2427 |
const Register key = rcx; // key array address |
|
2428 |
const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2429 |
// and left with the results of the last encryption block |
|
2430 |
const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2431 |
const Register pos = rax; |
|
2432 |
||
2433 |
// xmm register assignments for the loops below |
|
2434 |
const XMMRegister xmm_result = xmm0; |
|
2435 |
const XMMRegister xmm_temp = xmm1; |
|
2436 |
// first 6 keys preloaded into xmm2-xmm7 |
|
2437 |
const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2438 |
const int XMM_REG_NUM_KEY_LAST = 7; |
|
2439 |
const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2440 |
||
2441 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2442 |
handleSOERegisters(true /*saving*/); |
|
2443 |
||
2444 |
// load registers from incoming parameters |
|
2445 |
const Address from_param(rbp, 8+0); |
|
2446 |
const Address to_param (rbp, 8+4); |
|
2447 |
const Address key_param (rbp, 8+8); |
|
2448 |
const Address rvec_param (rbp, 8+12); |
|
2449 |
const Address len_param (rbp, 8+16); |
|
2450 |
__ movptr(from , from_param); |
|
2451 |
__ movptr(to , to_param); |
|
2452 |
__ movptr(key , key_param); |
|
2453 |
__ movptr(rvec , rvec_param); |
|
2454 |
__ movptr(len_reg , len_param); |
|
2455 |
||
2456 |
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front |
|
2457 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2458 |
// load up xmm regs 2 thru 7 with keys 0-5 |
|
2459 |
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2460 |
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2461 |
offset += 0x10; |
|
2462 |
} |
|
2463 |
||
2464 |
__ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec |
|
2465 |
||
2466 |
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2467 |
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2468 |
__ cmpl(rax, 44); |
|
2469 |
__ jcc(Assembler::notEqual, L_key_192_256); |
|
2470 |
||
2471 |
// 128 bit code follows here |
|
14834 | 2472 |
__ movl(pos, 0); |
14132 | 2473 |
__ align(OptoLoopAlignment); |
2474 |
__ BIND(L_loopTop_128); |
|
2475 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
|
2476 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2477 |
||
2478 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2479 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2480 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2481 |
} |
|
2482 |
for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { |
|
2483 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2484 |
} |
|
2485 |
load_key(xmm_temp, key, 0xa0); |
|
2486 |
__ aesenclast(xmm_result, xmm_temp); |
|
2487 |
||
2488 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2489 |
// no need to store r to memory until we exit |
|
2490 |
__ addptr(pos, AESBlockSize); |
|
2491 |
__ subptr(len_reg, AESBlockSize); |
|
2492 |
__ jcc(Assembler::notEqual, L_loopTop_128); |
|
2493 |
||
2494 |
__ BIND(L_exit); |
|
2495 |
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object |
|
2496 |
||
2497 |
handleSOERegisters(false /*restoring*/); |
|
22505 | 2498 |
__ movptr(rax, len_param); // return length |
14132 | 2499 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
2500 |
__ ret(0); |
|
2501 |
||
14834 | 2502 |
__ BIND(L_key_192_256); |
2503 |
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
14132 | 2504 |
__ cmpl(rax, 52); |
2505 |
__ jcc(Assembler::notEqual, L_key_256); |
|
2506 |
||
2507 |
// 192-bit code follows here (could be changed to use more xmm registers) |
|
14834 | 2508 |
__ movl(pos, 0); |
2509 |
__ align(OptoLoopAlignment); |
|
2510 |
__ BIND(L_loopTop_192); |
|
14132 | 2511 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2512 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2513 |
||
2514 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2515 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2516 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2517 |
} |
|
2518 |
for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { |
|
2519 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2520 |
} |
|
2521 |
load_key(xmm_temp, key, 0xc0); |
|
2522 |
__ aesenclast(xmm_result, xmm_temp); |
|
2523 |
||
2524 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2525 |
// no need to store r to memory until we exit |
|
2526 |
__ addptr(pos, AESBlockSize); |
|
2527 |
__ subptr(len_reg, AESBlockSize); |
|
2528 |
__ jcc(Assembler::notEqual, L_loopTop_192); |
|
2529 |
__ jmp(L_exit); |
|
2530 |
||
14834 | 2531 |
__ BIND(L_key_256); |
14132 | 2532 |
// 256-bit code follows here (could be changed to use more xmm registers) |
14834 | 2533 |
__ movl(pos, 0); |
2534 |
__ align(OptoLoopAlignment); |
|
2535 |
__ BIND(L_loopTop_256); |
|
14132 | 2536 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2537 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2538 |
||
2539 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2540 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2541 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2542 |
} |
|
2543 |
for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { |
|
2544 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2545 |
} |
|
2546 |
load_key(xmm_temp, key, 0xe0); |
|
2547 |
__ aesenclast(xmm_result, xmm_temp); |
|
2548 |
||
2549 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2550 |
// no need to store r to memory until we exit |
|
2551 |
__ addptr(pos, AESBlockSize); |
|
2552 |
__ subptr(len_reg, AESBlockSize); |
|
2553 |
__ jcc(Assembler::notEqual, L_loopTop_256); |
|
2554 |
__ jmp(L_exit); |
|
2555 |
||
2556 |
return start; |
|
2557 |
} |
|
2558 |
||
2559 |
||
2560 |
// CBC AES Decryption. |
|
2561 |
// In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. |
|
2562 |
// |
|
2563 |
// Arguments: |
|
2564 |
// |
|
2565 |
// Inputs: |
|
2566 |
// c_rarg0 - source byte array address |
|
2567 |
// c_rarg1 - destination byte array address |
|
2568 |
// c_rarg2 - K (key) in little endian int array |
|
2569 |
// c_rarg3 - r vector byte array address |
|
2570 |
// c_rarg4 - input length |
|
2571 |
// |
|
22505 | 2572 |
// Output: |
2573 |
// rax - input length |
|
2574 |
// |
|
14132 | 2575 |
|
2576 |
address generate_cipherBlockChaining_decryptAESCrypt() { |
|
14834 | 2577 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2578 |
__ align(CodeEntryAlignment); |
2579 |
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); |
|
2580 |
address start = __ pc(); |
|
2581 |
||
2582 |
Label L_exit, L_key_192_256, L_key_256; |
|
2583 |
Label L_singleBlock_loopTop_128; |
|
2584 |
Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; |
|
2585 |
const Register from = rsi; // source array address |
|
2586 |
const Register to = rdx; // destination array address |
|
2587 |
const Register key = rcx; // key array address |
|
2588 |
const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2589 |
// and left with the results of the last encryption block |
|
2590 |
const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2591 |
const Register pos = rax; |
|
2592 |
||
2593 |
// xmm register assignments for the loops below |
|
2594 |
const XMMRegister xmm_result = xmm0; |
|
2595 |
const XMMRegister xmm_temp = xmm1; |
|
2596 |
// first 6 keys preloaded into xmm2-xmm7 |
|
2597 |
const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2598 |
const int XMM_REG_NUM_KEY_LAST = 7; |
|
2599 |
const int FIRST_NON_REG_KEY_offset = 0x70; |
|
2600 |
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2601 |
||
2602 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2603 |
handleSOERegisters(true /*saving*/); |
|
2604 |
||
2605 |
// load registers from incoming parameters |
|
2606 |
const Address from_param(rbp, 8+0); |
|
2607 |
const Address to_param (rbp, 8+4); |
|
2608 |
const Address key_param (rbp, 8+8); |
|
2609 |
const Address rvec_param (rbp, 8+12); |
|
2610 |
const Address len_param (rbp, 8+16); |
|
2611 |
__ movptr(from , from_param); |
|
2612 |
__ movptr(to , to_param); |
|
2613 |
__ movptr(key , key_param); |
|
2614 |
__ movptr(rvec , rvec_param); |
|
2615 |
__ movptr(len_reg , len_param); |
|
2616 |
||
2617 |
// the java expanded key ordering is rotated one position from what we want |
|
2618 |
// so we start from 0x10 here and hit 0x00 last |
|
2619 |
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front |
|
2620 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2621 |
// load up xmm regs 2 thru 6 with first 5 keys |
|
2622 |
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2623 |
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2624 |
offset += 0x10; |
|
2625 |
} |
|
2626 |
||
2627 |
// inside here, use the rvec register to point to previous block cipher |
|
2628 |
// with which we xor at the end of each newly decrypted block |
|
2629 |
const Register prev_block_cipher_ptr = rvec; |
|
2630 |
||
2631 |
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2632 |
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2633 |
__ cmpl(rax, 44); |
|
2634 |
__ jcc(Assembler::notEqual, L_key_192_256); |
|
2635 |
||
2636 |
||
2637 |
// 128-bit code follows here, parallelized |
|
14834 | 2638 |
__ movl(pos, 0); |
2639 |
__ align(OptoLoopAlignment); |
|
2640 |
__ BIND(L_singleBlock_loopTop_128); |
|
14132 | 2641 |
__ cmpptr(len_reg, 0); // any blocks left?? |
2642 |
__ jcc(Assembler::equal, L_exit); |
|
2643 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2644 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2645 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2646 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2647 |
} |
|
2648 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 |
|
2649 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2650 |
} |
|
2651 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2652 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2653 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2654 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2655 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2656 |
// no need to store r to memory until we exit |
|
2657 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2658 |
__ addptr(pos, AESBlockSize); |
|
2659 |
__ subptr(len_reg, AESBlockSize); |
|
2660 |
__ jmp(L_singleBlock_loopTop_128); |
|
2661 |
||
2662 |
||
2663 |
__ BIND(L_exit); |
|
2664 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2665 |
__ movptr(rvec , rvec_param); // restore this since used in loop |
|
2666 |
__ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object |
|
2667 |
handleSOERegisters(false /*restoring*/); |
|
22505 | 2668 |
__ movptr(rax, len_param); // return length |
14132 | 2669 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
2670 |
__ ret(0); |
|
2671 |
||
2672 |
||
2673 |
__ BIND(L_key_192_256); |
|
2674 |
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
2675 |
__ cmpl(rax, 52); |
|
2676 |
__ jcc(Assembler::notEqual, L_key_256); |
|
2677 |
||
2678 |
// 192-bit code follows here (could be optimized to use parallelism) |
|
14834 | 2679 |
__ movl(pos, 0); |
14132 | 2680 |
__ align(OptoLoopAlignment); |
2681 |
__ BIND(L_singleBlock_loopTop_192); |
|
2682 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2683 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2684 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2685 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2686 |
} |
|
2687 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 |
|
2688 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2689 |
} |
|
2690 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2691 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2692 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2693 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2694 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2695 |
// no need to store r to memory until we exit |
|
2696 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2697 |
__ addptr(pos, AESBlockSize); |
|
2698 |
__ subptr(len_reg, AESBlockSize); |
|
2699 |
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); |
|
2700 |
__ jmp(L_exit); |
|
2701 |
||
2702 |
__ BIND(L_key_256); |
|
2703 |
// 256-bit code follows here (could be optimized to use parallelism) |
|
14834 | 2704 |
__ movl(pos, 0); |
14132 | 2705 |
__ align(OptoLoopAlignment); |
2706 |
__ BIND(L_singleBlock_loopTop_256); |
|
2707 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2708 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2709 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2710 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2711 |
} |
|
2712 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 |
|
2713 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2714 |
} |
|
2715 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2716 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2717 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2718 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2719 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2720 |
// no need to store r to memory until we exit |
|
2721 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2722 |
__ addptr(pos, AESBlockSize); |
|
2723 |
__ subptr(len_reg, AESBlockSize); |
|
2724 |
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); |
|
2725 |
__ jmp(L_exit); |
|
2726 |
||
2727 |
return start; |
|
2728 |
} |
|
2729 |
||
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2730 |
// byte swap x86 long |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2731 |
address generate_ghash_long_swap_mask() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2732 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2733 |
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2734 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2735 |
__ emit_data(0x0b0a0908, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2736 |
__ emit_data(0x0f0e0d0c, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2737 |
__ emit_data(0x03020100, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2738 |
__ emit_data(0x07060504, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2739 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2740 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2741 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2742 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2743 |
// byte swap x86 byte array |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2744 |
address generate_ghash_byte_swap_mask() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2745 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2746 |
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2747 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2748 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2749 |
__ emit_data(0x08090a0b, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2750 |
__ emit_data(0x04050607, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2751 |
__ emit_data(0x00010203, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2752 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2753 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2754 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2755 |
/* Single and multi-block ghash operations */ |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2756 |
address generate_ghash_processBlocks() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2757 |
assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2758 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2759 |
Label L_ghash_loop, L_exit; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2760 |
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2761 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2762 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2763 |
const Register state = rdi; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2764 |
const Register subkeyH = rsi; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2765 |
const Register data = rdx; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2766 |
const Register blocks = rcx; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2767 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2768 |
const Address state_param(rbp, 8+0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2769 |
const Address subkeyH_param(rbp, 8+4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2770 |
const Address data_param(rbp, 8+8); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2771 |
const Address blocks_param(rbp, 8+12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2772 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2773 |
const XMMRegister xmm_temp0 = xmm0; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2774 |
const XMMRegister xmm_temp1 = xmm1; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2775 |
const XMMRegister xmm_temp2 = xmm2; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2776 |
const XMMRegister xmm_temp3 = xmm3; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2777 |
const XMMRegister xmm_temp4 = xmm4; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2778 |
const XMMRegister xmm_temp5 = xmm5; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2779 |
const XMMRegister xmm_temp6 = xmm6; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2780 |
const XMMRegister xmm_temp7 = xmm7; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2781 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2782 |
__ enter(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2783 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2784 |
__ movptr(state, state_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2785 |
__ movptr(subkeyH, subkeyH_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2786 |
__ movptr(data, data_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2787 |
__ movptr(blocks, blocks_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2788 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2789 |
__ movdqu(xmm_temp0, Address(state, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2790 |
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2791 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2792 |
__ movdqu(xmm_temp1, Address(subkeyH, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2793 |
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2794 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2795 |
__ BIND(L_ghash_loop); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2796 |
__ movdqu(xmm_temp2, Address(data, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2797 |
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2798 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2799 |
__ pxor(xmm_temp0, xmm_temp2); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2800 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2801 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2802 |
// Multiply with the hash key |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2803 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2804 |
__ movdqu(xmm_temp3, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2805 |
__ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2806 |
__ movdqu(xmm_temp4, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2807 |
__ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2808 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2809 |
__ movdqu(xmm_temp5, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2810 |
__ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2811 |
__ movdqu(xmm_temp6, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2812 |
__ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2813 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2814 |
__ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2815 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2816 |
__ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2817 |
__ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2818 |
__ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2819 |
__ pxor(xmm_temp3, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2820 |
__ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2821 |
// of the carry-less multiplication of |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2822 |
// xmm0 by xmm1. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2823 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2824 |
// We shift the result of the multiplication by one bit position |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2825 |
// to the left to cope for the fact that the bits are reversed. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2826 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2827 |
__ movdqu(xmm_temp4, xmm_temp6); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2828 |
__ pslld (xmm_temp3, 1); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2829 |
__ pslld(xmm_temp6, 1); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2830 |
__ psrld(xmm_temp7, 31); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2831 |
__ psrld(xmm_temp4, 31); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2832 |
__ movdqu(xmm_temp5, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2833 |
__ pslldq(xmm_temp4, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2834 |
__ pslldq(xmm_temp7, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2835 |
__ psrldq(xmm_temp5, 12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2836 |
__ por(xmm_temp3, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2837 |
__ por(xmm_temp6, xmm_temp4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2838 |
__ por(xmm_temp6, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2839 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2840 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2841 |
// First phase of the reduction |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2842 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2843 |
// Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2844 |
// independently. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2845 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2846 |
__ movdqu(xmm_temp4, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2847 |
__ movdqu(xmm_temp5, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2848 |
__ pslld(xmm_temp7, 31); // packed right shift shifting << 31 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2849 |
__ pslld(xmm_temp4, 30); // packed right shift shifting << 30 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2850 |
__ pslld(xmm_temp5, 25); // packed right shift shifting << 25 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2851 |
__ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2852 |
__ pxor(xmm_temp7, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2853 |
__ movdqu(xmm_temp4, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2854 |
__ pslldq(xmm_temp7, 12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2855 |
__ psrldq(xmm_temp4, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2856 |
__ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2857 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2858 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2859 |
// Second phase of the reduction |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2860 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2861 |
// Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2862 |
// shift operations. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2863 |
__ movdqu(xmm_temp2, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2864 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2865 |
__ movdqu(xmm_temp5, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2866 |
__ psrld(xmm_temp2, 1); // packed left shifting >> 1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2867 |
__ psrld(xmm_temp7, 2); // packed left shifting >> 2 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2868 |
__ psrld(xmm_temp5, 7); // packed left shifting >> 7 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2869 |
__ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2870 |
__ pxor(xmm_temp2, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2871 |
__ pxor(xmm_temp2, xmm_temp4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2872 |
__ pxor(xmm_temp3, xmm_temp2); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2873 |
__ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2874 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2875 |
__ decrement(blocks); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2876 |
__ jcc(Assembler::zero, L_exit); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2877 |
__ movdqu(xmm_temp0, xmm_temp6); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2878 |
__ addptr(data, 16); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2879 |
__ jmp(L_ghash_loop); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2880 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2881 |
__ BIND(L_exit); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2882 |
// Byte swap 16-byte result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2883 |
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2884 |
__ movdqu(Address(state, 0), xmm_temp6); // store the result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2885 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2886 |
__ leave(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2887 |
__ ret(0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2888 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2889 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2890 |
|
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2891 |
/** |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2892 |
* Arguments: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2893 |
* |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2894 |
* Inputs: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2895 |
* rsp(4) - int crc |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2896 |
* rsp(8) - byte* buf |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2897 |
* rsp(12) - int length |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2898 |
* |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2899 |
* Ouput: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2900 |
* rax - int crc result |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2901 |
*/ |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2902 |
address generate_updateBytesCRC32() { |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2903 |
assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2904 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2905 |
__ align(CodeEntryAlignment); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2906 |
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2907 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2908 |
address start = __ pc(); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2909 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2910 |
const Register crc = rdx; // crc |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2911 |
const Register buf = rsi; // source java byte array address |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2912 |
const Register len = rcx; // length |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2913 |
const Register table = rdi; // crc_table address (reuse register) |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2914 |
const Register tmp = rbx; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2915 |
assert_different_registers(crc, buf, len, table, tmp, rax); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2916 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2917 |
BLOCK_COMMENT("Entry:"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2918 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2919 |
__ push(rsi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2920 |
__ push(rdi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2921 |
__ push(rbx); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2922 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2923 |
Address crc_arg(rbp, 8 + 0); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2924 |
Address buf_arg(rbp, 8 + 4); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2925 |
Address len_arg(rbp, 8 + 8); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2926 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2927 |
// Load up: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2928 |
__ movl(crc, crc_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2929 |
__ movptr(buf, buf_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2930 |
__ movl(len, len_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2931 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2932 |
__ kernel_crc32(crc, buf, len, table, tmp); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2933 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2934 |
__ movl(rax, crc); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2935 |
__ pop(rbx); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2936 |
__ pop(rdi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2937 |
__ pop(rsi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2938 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2939 |
__ ret(0); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2940 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2941 |
return start; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2942 |
} |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2943 |
|
18740 | 2944 |
// Safefetch stubs. |
2945 |
void generate_safefetch(const char* name, int size, address* entry, |
|
2946 |
address* fault_pc, address* continuation_pc) { |
|
2947 |
// safefetch signatures: |
|
2948 |
// int SafeFetch32(int* adr, int errValue); |
|
2949 |
// intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); |
|
2950 |
||
2951 |
StubCodeMark mark(this, "StubRoutines", name); |
|
2952 |
||
2953 |
// Entry point, pc or function descriptor. |
|
2954 |
*entry = __ pc(); |
|
2955 |
||
2956 |
__ movl(rax, Address(rsp, 0x8)); |
|
2957 |
__ movl(rcx, Address(rsp, 0x4)); |
|
2958 |
// Load *adr into eax, may fault. |
|
2959 |
*fault_pc = __ pc(); |
|
2960 |
switch (size) { |
|
2961 |
case 4: |
|
2962 |
// int32_t |
|
2963 |
__ movl(rax, Address(rcx, 0)); |
|
2964 |
break; |
|
2965 |
case 8: |
|
2966 |
// int64_t |
|
2967 |
Unimplemented(); |
|
2968 |
break; |
|
2969 |
default: |
|
2970 |
ShouldNotReachHere(); |
|
2971 |
} |
|
2972 |
||
2973 |
// Return errValue or *adr. |
|
2974 |
*continuation_pc = __ pc(); |
|
2975 |
__ ret(0); |
|
2976 |
} |
|
14132 | 2977 |
|
1 | 2978 |
public: |
2979 |
// Information about frame layout at time of blocking runtime call. |
|
2980 |
// Note that we only have to preserve callee-saved registers since |
|
2981 |
// the compilers are responsible for supplying a continuation point |
|
2982 |
// if they expect all registers to be preserved. |
|
2983 |
enum layout { |
|
2984 |
thread_off, // last_java_sp |
|
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2985 |
arg1_off, |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
2986 |
arg2_off, |
1 | 2987 |
rbp_off, // callee saved register |
2988 |
ret_pc, |
|
2989 |
framesize |
|
2990 |
}; |
|
2991 |
||
2992 |
private: |
|
2993 |
||
2994 |
#undef __ |
|
2995 |
#define __ masm-> |
|
2996 |
||
2997 |
//------------------------------------------------------------------------------------------------------------------------ |
|
2998 |
// Continuation point for throwing of implicit exceptions that are not handled in |
|
2999 |
// the current activation. Fabricates an exception oop and initiates normal |
|
3000 |
// exception dispatching in this frame. |
|
3001 |
// |
|
3002 |
// Previously the compiler (c2) allowed for callee save registers on Java calls. |
|
3003 |
// This is no longer true after adapter frames were removed but could possibly |
|
3004 |
// be brought back in the future if the interpreter code was reworked and it |
|
3005 |
// was deemed worthwhile. The comment below was left to describe what must |
|
3006 |
// happen here if callee saves were resurrected. As it stands now this stub |
|
3007 |
// could actually be a vanilla BufferBlob and have now oopMap at all. |
|
3008 |
// Since it doesn't make much difference we've chosen to leave it the |
|
3009 |
// way it was in the callee save days and keep the comment. |
|
3010 |
||
3011 |
// If we need to preserve callee-saved values we need a callee-saved oop map and |
|
3012 |
// therefore have to make these stubs into RuntimeStubs rather than BufferBlobs. |
|
3013 |
// If the compiler needs all registers to be preserved between the fault |
|
3014 |
// point and the exception handler then it must assume responsibility for that in |
|
3015 |
// AbstractCompiler::continuation_for_implicit_null_exception or |
|
3016 |
// continuation_for_implicit_division_by_zero_exception. All other implicit |
|
3017 |
// exceptions (e.g., NullPointerException or AbstractMethodError on entry) are |
|
3018 |
// either at call sites or otherwise assume that stack unwinding will be initiated, |
|
3019 |
// so caller saved registers were assumed volatile in the compiler. |
|
3020 |
address generate_throw_exception(const char* name, address runtime_entry, |
|
10545 | 3021 |
Register arg1 = noreg, Register arg2 = noreg) { |
1 | 3022 |
|
3023 |
int insts_size = 256; |
|
3024 |
int locs_size = 32; |
|
3025 |
||
3026 |
CodeBuffer code(name, insts_size, locs_size); |
|
3027 |
OopMapSet* oop_maps = new OopMapSet(); |
|
3028 |
MacroAssembler* masm = new MacroAssembler(&code); |
|
3029 |
||
3030 |
address start = __ pc(); |
|
3031 |
||
3032 |
// This is an inlined and slightly modified version of call_VM |
|
3033 |
// which has the ability to fetch the return PC out of |
|
3034 |
// thread-local storage and also sets up last_Java_sp slightly |
|
3035 |
// differently than the real call_VM |
|
3036 |
Register java_thread = rbx; |
|
3037 |
__ get_thread(java_thread); |
|
3038 |
||
3039 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3040 |
||
3041 |
// pc and rbp, already pushed |
|
1066 | 3042 |
__ subptr(rsp, (framesize-2) * wordSize); // prolog |
1 | 3043 |
|
3044 |
// Frame is now completed as far as size and linkage. |
|
3045 |
||
3046 |
int frame_complete = __ pc() - start; |
|
3047 |
||
3048 |
// push java thread (becomes first argument of C function) |
|
1066 | 3049 |
__ movptr(Address(rsp, thread_off * wordSize), java_thread); |
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3050 |
if (arg1 != noreg) { |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3051 |
__ movptr(Address(rsp, arg1_off * wordSize), arg1); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3052 |
} |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3053 |
if (arg2 != noreg) { |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3054 |
assert(arg1 != noreg, "missing reg arg"); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3055 |
__ movptr(Address(rsp, arg2_off * wordSize), arg2); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3056 |
} |
1 | 3057 |
|
3058 |
// Set up last_Java_sp and last_Java_fp |
|
3059 |
__ set_last_Java_frame(java_thread, rsp, rbp, NULL); |
|
3060 |
||
3061 |
// Call runtime |
|
3062 |
BLOCK_COMMENT("call runtime_entry"); |
|
3063 |
__ call(RuntimeAddress(runtime_entry)); |
|
3064 |
// Generate oop map |
|
3065 |
OopMap* map = new OopMap(framesize, 0); |
|
3066 |
oop_maps->add_gc_map(__ pc() - start, map); |
|
3067 |
||
3068 |
// restore the thread (cannot use the pushed argument since arguments |
|
3069 |
// may be overwritten by C code generated by an optimizing compiler); |
|
3070 |
// however can use the register value directly if it is callee saved. |
|
3071 |
__ get_thread(java_thread); |
|
3072 |
||
3073 |
__ reset_last_Java_frame(java_thread, true, false); |
|
3074 |
||
3075 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3076 |
||
3077 |
// check for pending exceptions |
|
3078 |
#ifdef ASSERT |
|
3079 |
Label L; |
|
1066 | 3080 |
__ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
1 | 3081 |
__ jcc(Assembler::notEqual, L); |
3082 |
__ should_not_reach_here(); |
|
3083 |
__ bind(L); |
|
3084 |
#endif /* ASSERT */ |
|
3085 |
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); |
|
3086 |
||
3087 |
||
3088 |
RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false); |
|
3089 |
return stub->entry_point(); |
|
3090 |
} |
|
3091 |
||
3092 |
||
3093 |
void create_control_words() { |
|
3094 |
// Round to nearest, 53-bit mode, exceptions masked |
|
3095 |
StubRoutines::_fpu_cntrl_wrd_std = 0x027F; |
|
3096 |
// Round to zero, 53-bit mode, exception mased |
|
3097 |
StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F; |
|
3098 |
// Round to nearest, 24-bit mode, exceptions masked |
|
3099 |
StubRoutines::_fpu_cntrl_wrd_24 = 0x007F; |
|
3100 |
// Round to nearest, 64-bit mode, exceptions masked |
|
3101 |
StubRoutines::_fpu_cntrl_wrd_64 = 0x037F; |
|
3102 |
// Round to nearest, 64-bit mode, exceptions masked |
|
3103 |
StubRoutines::_mxcsr_std = 0x1F80; |
|
3104 |
// Note: the following two constants are 80-bit values |
|
3105 |
// layout is critical for correct loading by FPU. |
|
3106 |
// Bias for strict fp multiply/divide |
|
3107 |
StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000 |
|
3108 |
StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000; |
|
3109 |
StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff; |
|
3110 |
// Un-Bias for strict fp multiply/divide |
|
3111 |
StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000 |
|
3112 |
StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000; |
|
3113 |
StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff; |
|
3114 |
} |
|
3115 |
||
3116 |
//--------------------------------------------------------------------------- |
|
3117 |
// Initialization |
|
3118 |
||
3119 |
void generate_initial() { |
|
3120 |
// Generates all stubs and initializes the entry points |
|
3121 |
||
3122 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3123 |
// entry points that exist in all platforms |
|
3124 |
// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than |
|
3125 |
// the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. |
|
3126 |
StubRoutines::_forward_exception_entry = generate_forward_exception(); |
|
3127 |
||
3128 |
StubRoutines::_call_stub_entry = |
|
3129 |
generate_call_stub(StubRoutines::_call_stub_return_address); |
|
3130 |
// is referenced by megamorphic call |
|
3131 |
StubRoutines::_catch_exception_entry = generate_catch_exception(); |
|
3132 |
||
3133 |
// These are currently used by Solaris/Intel |
|
3134 |
StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); |
|
3135 |
||
3136 |
StubRoutines::_handler_for_unsafe_access_entry = |
|
3137 |
generate_handler_for_unsafe_access(); |
|
3138 |
||
3139 |
// platform dependent |
|
3140 |
create_control_words(); |
|
3141 |
||
1066 | 3142 |
StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); |
3143 |
StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd(); |
|
1 | 3144 |
StubRoutines::_d2i_wrapper = generate_d2i_wrapper(T_INT, |
3145 |
CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); |
|
3146 |
StubRoutines::_d2l_wrapper = generate_d2i_wrapper(T_LONG, |
|
3147 |
CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); |
|
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3148 |
|
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3149 |
// Build this early so it's available for the interpreter |
11411 | 3150 |
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); |
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3151 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3152 |
if (UseCRC32Intrinsics) { |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3153 |
// set table address before stub generation which use it |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3154 |
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3155 |
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3156 |
} |
1 | 3157 |
} |
3158 |
||
3159 |
||
3160 |
void generate_all() { |
|
3161 |
// Generates all stubs and initializes the entry points |
|
3162 |
||
3163 |
// These entry points require SharedInfo::stack0 to be set up in non-core builds |
|
3164 |
// and need to be relocatable, so they each fabricate a RuntimeStub internally. |
|
10545 | 3165 |
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); |
3166 |
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); |
|
3167 |
StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); |
|
1 | 3168 |
|
3169 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3170 |
// entry points that are platform specific |
|
3171 |
||
3172 |
// support for verify_oop (must happen after universe_init) |
|
3173 |
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); |
|
3174 |
||
3175 |
// arraycopy stubs used by compilers |
|
3176 |
generate_arraycopy_stubs(); |
|
2534 | 3177 |
|
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
3178 |
generate_math_stubs(); |
14132 | 3179 |
|
3180 |
// don't bother generating these AES intrinsic stubs unless global flag is set |
|
3181 |
if (UseAESIntrinsics) { |
|
3182 |
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others |
|
3183 |
||
3184 |
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); |
|
3185 |
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); |
|
3186 |
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); |
|
3187 |
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); |
|
3188 |
} |
|
18740 | 3189 |
|
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3190 |
// Generate GHASH intrinsics code |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3191 |
if (UseGHASHIntrinsics) { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3192 |
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3193 |
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3194 |
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3195 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3196 |
|
18740 | 3197 |
// Safefetch stubs. |
3198 |
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, |
|
3199 |
&StubRoutines::_safefetch32_fault_pc, |
|
3200 |
&StubRoutines::_safefetch32_continuation_pc); |
|
3201 |
StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; |
|
3202 |
StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; |
|
3203 |
StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; |
|
1 | 3204 |
} |
3205 |
||
3206 |
||
3207 |
public: |
|
3208 |
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |
|
3209 |
if (all) { |
|
3210 |
generate_all(); |
|
3211 |
} else { |
|
3212 |
generate_initial(); |
|
3213 |
} |
|
3214 |
} |
|
3215 |
}; // end class declaration |
|
3216 |
||
3217 |
||
3218 |
void StubGenerator_generate(CodeBuffer* code, bool all) { |
|
3219 |
StubGenerator g(code, all); |
|
3220 |
} |