author | fparain |
Fri, 11 Dec 2015 09:07:07 -0800 | |
changeset 35071 | a0910b1d3e0d |
parent 33465 | 6063f28a6efb |
child 35135 | dd2ce9021031 |
permissions | -rw-r--r-- |
1 | 1 |
/* |
29325 | 2 |
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. |
1 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
5547
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
20 |
* or visit www.oracle.com if you need additional information or have any |
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
21 |
* questions. |
1 | 22 |
* |
23 |
*/ |
|
24 |
||
7397 | 25 |
#include "precompiled.hpp" |
14626
0cf4eccf130f
8003240: x86: move MacroAssembler into separate file
twisti
parents:
14132
diff
changeset
|
26 |
#include "asm/macroAssembler.hpp" |
0cf4eccf130f
8003240: x86: move MacroAssembler into separate file
twisti
parents:
14132
diff
changeset
|
27 |
#include "asm/macroAssembler.inline.hpp" |
7397 | 28 |
#include "interpreter/interpreter.hpp" |
29 |
#include "nativeInst_x86.hpp" |
|
30 |
#include "oops/instanceOop.hpp" |
|
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
31 |
#include "oops/method.hpp" |
7397 | 32 |
#include "oops/objArrayKlass.hpp" |
33 |
#include "oops/oop.inline.hpp" |
|
34 |
#include "prims/methodHandles.hpp" |
|
35 |
#include "runtime/frame.inline.hpp" |
|
36 |
#include "runtime/handles.inline.hpp" |
|
37 |
#include "runtime/sharedRuntime.hpp" |
|
38 |
#include "runtime/stubCodeGenerator.hpp" |
|
39 |
#include "runtime/stubRoutines.hpp" |
|
14583
d70ee55535f4
8003935: Simplify the needed includes for using Thread::current()
stefank
parents:
14132
diff
changeset
|
40 |
#include "runtime/thread.inline.hpp" |
7397 | 41 |
#include "utilities/top.hpp" |
42 |
#ifdef COMPILER2 |
|
43 |
#include "opto/runtime.hpp" |
|
44 |
#endif |
|
1 | 45 |
|
46 |
// Declaration and definition of StubGenerator (no .hpp file). |
|
47 |
// For a more detailed description of the stub routine structure |
|
48 |
// see the comment in stubRoutines.hpp |
|
49 |
||
50 |
#define __ _masm-> |
|
1066 | 51 |
#define a__ ((Assembler*)_masm)-> |
1 | 52 |
|
53 |
#ifdef PRODUCT |
|
54 |
#define BLOCK_COMMENT(str) /* nothing */ |
|
55 |
#else |
|
56 |
#define BLOCK_COMMENT(str) __ block_comment(str) |
|
57 |
#endif |
|
58 |
||
59 |
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") |
|
60 |
||
61 |
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions |
|
62 |
const int FPU_CNTRL_WRD_MASK = 0xFFFF; |
|
63 |
||
64 |
// ------------------------------------------------------------------------------------------------------------------------- |
|
65 |
// Stub Code definitions |
|
66 |
||
67 |
static address handle_unsafe_access() { |
|
68 |
JavaThread* thread = JavaThread::current(); |
|
69 |
address pc = thread->saved_exception_pc(); |
|
70 |
// pc is the instruction which we must emulate |
|
71 |
// doing a no-op is fine: return garbage from the load |
|
72 |
// therefore, compute npc |
|
73 |
address npc = Assembler::locate_next_instruction(pc); |
|
74 |
||
75 |
// request an async exception |
|
76 |
thread->set_pending_unsafe_access_error(); |
|
77 |
||
78 |
// return address of next instruction to execute |
|
79 |
return npc; |
|
80 |
} |
|
81 |
||
82 |
class StubGenerator: public StubCodeGenerator { |
|
83 |
private: |
|
84 |
||
85 |
#ifdef PRODUCT |
|
18073
f02460441ddc
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
17622
diff
changeset
|
86 |
#define inc_counter_np(counter) ((void)0) |
1 | 87 |
#else |
88 |
void inc_counter_np_(int& counter) { |
|
1066 | 89 |
__ incrementl(ExternalAddress((address)&counter)); |
1 | 90 |
} |
91 |
#define inc_counter_np(counter) \ |
|
92 |
BLOCK_COMMENT("inc_counter " #counter); \ |
|
93 |
inc_counter_np_(counter); |
|
94 |
#endif //PRODUCT |
|
95 |
||
96 |
void inc_copy_counter_np(BasicType t) { |
|
97 |
#ifndef PRODUCT |
|
98 |
switch (t) { |
|
99 |
case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return; |
|
100 |
case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return; |
|
101 |
case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return; |
|
102 |
case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return; |
|
103 |
case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return; |
|
104 |
} |
|
105 |
ShouldNotReachHere(); |
|
106 |
#endif //PRODUCT |
|
107 |
} |
|
108 |
||
109 |
//------------------------------------------------------------------------------------------------------------------------ |
|
110 |
// Call stubs are used to call Java from C |
|
111 |
// |
|
112 |
// [ return_from_Java ] <--- rsp |
|
113 |
// [ argument word n ] |
|
114 |
// ... |
|
115 |
// -N [ argument word 1 ] |
|
116 |
// -7 [ Possible padding for stack alignment ] |
|
117 |
// -6 [ Possible padding for stack alignment ] |
|
118 |
// -5 [ Possible padding for stack alignment ] |
|
119 |
// -4 [ mxcsr save ] <--- rsp_after_call |
|
120 |
// -3 [ saved rbx, ] |
|
121 |
// -2 [ saved rsi ] |
|
122 |
// -1 [ saved rdi ] |
|
123 |
// 0 [ saved rbp, ] <--- rbp, |
|
124 |
// 1 [ return address ] |
|
125 |
// 2 [ ptr. to call wrapper ] |
|
126 |
// 3 [ result ] |
|
127 |
// 4 [ result_type ] |
|
128 |
// 5 [ method ] |
|
129 |
// 6 [ entry_point ] |
|
130 |
// 7 [ parameters ] |
|
131 |
// 8 [ parameter_size ] |
|
132 |
// 9 [ thread ] |
|
133 |
||
134 |
||
135 |
address generate_call_stub(address& return_address) { |
|
136 |
StubCodeMark mark(this, "StubRoutines", "call_stub"); |
|
137 |
address start = __ pc(); |
|
138 |
||
139 |
// stub code parameters / addresses |
|
140 |
assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code"); |
|
141 |
bool sse_save = false; |
|
142 |
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()! |
|
143 |
const int locals_count_in_bytes (4*wordSize); |
|
144 |
const Address mxcsr_save (rbp, -4 * wordSize); |
|
145 |
const Address saved_rbx (rbp, -3 * wordSize); |
|
146 |
const Address saved_rsi (rbp, -2 * wordSize); |
|
147 |
const Address saved_rdi (rbp, -1 * wordSize); |
|
148 |
const Address result (rbp, 3 * wordSize); |
|
149 |
const Address result_type (rbp, 4 * wordSize); |
|
150 |
const Address method (rbp, 5 * wordSize); |
|
151 |
const Address entry_point (rbp, 6 * wordSize); |
|
152 |
const Address parameters (rbp, 7 * wordSize); |
|
153 |
const Address parameter_size(rbp, 8 * wordSize); |
|
154 |
const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! |
|
155 |
sse_save = UseSSE > 0; |
|
156 |
||
157 |
// stub code |
|
158 |
__ enter(); |
|
1066 | 159 |
__ movptr(rcx, parameter_size); // parameter counter |
5419 | 160 |
__ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes |
1066 | 161 |
__ addptr(rcx, locals_count_in_bytes); // reserve space for register saves |
162 |
__ subptr(rsp, rcx); |
|
163 |
__ andptr(rsp, -(StackAlignmentInBytes)); // Align stack |
|
1 | 164 |
|
165 |
// save rdi, rsi, & rbx, according to C calling conventions |
|
1066 | 166 |
__ movptr(saved_rdi, rdi); |
167 |
__ movptr(saved_rsi, rsi); |
|
168 |
__ movptr(saved_rbx, rbx); |
|
30624 | 169 |
|
170 |
// provide initial value for required masks |
|
171 |
if (UseAVX > 2) { |
|
172 |
__ movl(rbx, 0xffff); |
|
173 |
__ kmovdl(k1, rbx); |
|
174 |
} |
|
175 |
||
1 | 176 |
// save and initialize %mxcsr |
177 |
if (sse_save) { |
|
178 |
Label skip_ldmx; |
|
179 |
__ stmxcsr(mxcsr_save); |
|
180 |
__ movl(rax, mxcsr_save); |
|
181 |
__ andl(rax, MXCSR_MASK); // Only check control and mask bits |
|
182 |
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); |
|
183 |
__ cmp32(rax, mxcsr_std); |
|
184 |
__ jcc(Assembler::equal, skip_ldmx); |
|
185 |
__ ldmxcsr(mxcsr_std); |
|
186 |
__ bind(skip_ldmx); |
|
187 |
} |
|
188 |
||
189 |
// make sure the control word is correct. |
|
190 |
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); |
|
191 |
||
192 |
#ifdef ASSERT |
|
193 |
// make sure we have no pending exceptions |
|
194 |
{ Label L; |
|
1066 | 195 |
__ movptr(rcx, thread); |
196 |
__ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
1 | 197 |
__ jcc(Assembler::equal, L); |
198 |
__ stop("StubRoutines::call_stub: entered with pending exception"); |
|
199 |
__ bind(L); |
|
200 |
} |
|
201 |
#endif |
|
202 |
||
203 |
// pass parameters if any |
|
204 |
BLOCK_COMMENT("pass parameters if any"); |
|
205 |
Label parameters_done; |
|
206 |
__ movl(rcx, parameter_size); // parameter counter |
|
207 |
__ testl(rcx, rcx); |
|
208 |
__ jcc(Assembler::zero, parameters_done); |
|
209 |
||
210 |
// parameter passing loop |
|
211 |
||
212 |
Label loop; |
|
213 |
// Copy Java parameters in reverse order (receiver last) |
|
214 |
// Note that the argument order is inverted in the process |
|
215 |
// source is rdx[rcx: N-1..0] |
|
216 |
// dest is rsp[rbx: 0..N-1] |
|
217 |
||
1066 | 218 |
__ movptr(rdx, parameters); // parameter pointer |
219 |
__ xorptr(rbx, rbx); |
|
1 | 220 |
|
221 |
__ BIND(loop); |
|
222 |
||
223 |
// get parameter |
|
1066 | 224 |
__ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize)); |
225 |
__ movptr(Address(rsp, rbx, Interpreter::stackElementScale(), |
|
1 | 226 |
Interpreter::expr_offset_in_bytes(0)), rax); // store parameter |
227 |
__ increment(rbx); |
|
228 |
__ decrement(rcx); |
|
229 |
__ jcc(Assembler::notZero, loop); |
|
230 |
||
231 |
// call Java function |
|
232 |
__ BIND(parameters_done); |
|
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
233 |
__ movptr(rbx, method); // get Method* |
1066 | 234 |
__ movptr(rax, entry_point); // get entry_point |
235 |
__ mov(rsi, rsp); // set sender sp |
|
1 | 236 |
BLOCK_COMMENT("call Java function"); |
237 |
__ call(rax); |
|
238 |
||
239 |
BLOCK_COMMENT("call_stub_return_address:"); |
|
240 |
return_address = __ pc(); |
|
241 |
||
8315
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
242 |
#ifdef COMPILER2 |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
243 |
{ |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
244 |
Label L_skip; |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
245 |
if (UseSSE >= 2) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
246 |
__ verify_FPU(0, "call_stub_return"); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
247 |
} else { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
248 |
for (int i = 1; i < 8; i++) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
249 |
__ ffree(i); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
250 |
} |
1 | 251 |
|
8315
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
252 |
// UseSSE <= 1 so double result should be left on TOS |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
253 |
__ movl(rsi, result_type); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
254 |
__ cmpl(rsi, T_DOUBLE); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
255 |
__ jcc(Assembler::equal, L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
256 |
if (UseSSE == 0) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
257 |
// UseSSE == 0 so float result should be left on TOS |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
258 |
__ cmpl(rsi, T_FLOAT); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
259 |
__ jcc(Assembler::equal, L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
260 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
261 |
__ ffree(0); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
262 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
263 |
__ BIND(L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
264 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
265 |
#endif // COMPILER2 |
1 | 266 |
|
267 |
// store result depending on type |
|
268 |
// (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) |
|
1066 | 269 |
__ movptr(rdi, result); |
1 | 270 |
Label is_long, is_float, is_double, exit; |
271 |
__ movl(rsi, result_type); |
|
272 |
__ cmpl(rsi, T_LONG); |
|
273 |
__ jcc(Assembler::equal, is_long); |
|
274 |
__ cmpl(rsi, T_FLOAT); |
|
275 |
__ jcc(Assembler::equal, is_float); |
|
276 |
__ cmpl(rsi, T_DOUBLE); |
|
277 |
__ jcc(Assembler::equal, is_double); |
|
278 |
||
279 |
// handle T_INT case |
|
280 |
__ movl(Address(rdi, 0), rax); |
|
281 |
__ BIND(exit); |
|
282 |
||
283 |
// check that FPU stack is empty |
|
284 |
__ verify_FPU(0, "generate_call_stub"); |
|
285 |
||
286 |
// pop parameters |
|
1066 | 287 |
__ lea(rsp, rsp_after_call); |
1 | 288 |
|
289 |
// restore %mxcsr |
|
290 |
if (sse_save) { |
|
291 |
__ ldmxcsr(mxcsr_save); |
|
292 |
} |
|
293 |
||
294 |
// restore rdi, rsi and rbx, |
|
1066 | 295 |
__ movptr(rbx, saved_rbx); |
296 |
__ movptr(rsi, saved_rsi); |
|
297 |
__ movptr(rdi, saved_rdi); |
|
298 |
__ addptr(rsp, 4*wordSize); |
|
1 | 299 |
|
300 |
// return |
|
1066 | 301 |
__ pop(rbp); |
1 | 302 |
__ ret(0); |
303 |
||
304 |
// handle return types different from T_INT |
|
305 |
__ BIND(is_long); |
|
306 |
__ movl(Address(rdi, 0 * wordSize), rax); |
|
307 |
__ movl(Address(rdi, 1 * wordSize), rdx); |
|
308 |
__ jmp(exit); |
|
309 |
||
310 |
__ BIND(is_float); |
|
311 |
// interpreter uses xmm0 for return values |
|
312 |
if (UseSSE >= 1) { |
|
313 |
__ movflt(Address(rdi, 0), xmm0); |
|
314 |
} else { |
|
315 |
__ fstp_s(Address(rdi, 0)); |
|
316 |
} |
|
317 |
__ jmp(exit); |
|
318 |
||
319 |
__ BIND(is_double); |
|
320 |
// interpreter uses xmm0 for return values |
|
321 |
if (UseSSE >= 2) { |
|
322 |
__ movdbl(Address(rdi, 0), xmm0); |
|
323 |
} else { |
|
324 |
__ fstp_d(Address(rdi, 0)); |
|
325 |
} |
|
326 |
__ jmp(exit); |
|
327 |
||
328 |
return start; |
|
329 |
} |
|
330 |
||
331 |
||
332 |
//------------------------------------------------------------------------------------------------------------------------ |
|
333 |
// Return point for a Java call if there's an exception thrown in Java code. |
|
334 |
// The exception is caught and transformed into a pending exception stored in |
|
335 |
// JavaThread that can be tested from within the VM. |
|
336 |
// |
|
337 |
// Note: Usually the parameters are removed by the callee. In case of an exception |
|
338 |
// crossing an activation frame boundary, that is not the case if the callee |
|
339 |
// is compiled code => need to setup the rsp. |
|
340 |
// |
|
341 |
// rax,: exception oop |
|
342 |
||
343 |
address generate_catch_exception() { |
|
344 |
StubCodeMark mark(this, "StubRoutines", "catch_exception"); |
|
345 |
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()! |
|
346 |
const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()! |
|
347 |
address start = __ pc(); |
|
348 |
||
349 |
// get thread directly |
|
1066 | 350 |
__ movptr(rcx, thread); |
1 | 351 |
#ifdef ASSERT |
352 |
// verify that threads correspond |
|
353 |
{ Label L; |
|
354 |
__ get_thread(rbx); |
|
1066 | 355 |
__ cmpptr(rbx, rcx); |
1 | 356 |
__ jcc(Assembler::equal, L); |
357 |
__ stop("StubRoutines::catch_exception: threads must correspond"); |
|
358 |
__ bind(L); |
|
359 |
} |
|
360 |
#endif |
|
361 |
// set pending exception |
|
362 |
__ verify_oop(rax); |
|
1066 | 363 |
__ movptr(Address(rcx, Thread::pending_exception_offset()), rax ); |
1 | 364 |
__ lea(Address(rcx, Thread::exception_file_offset ()), |
365 |
ExternalAddress((address)__FILE__)); |
|
366 |
__ movl(Address(rcx, Thread::exception_line_offset ()), __LINE__ ); |
|
367 |
// complete return to VM |
|
368 |
assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); |
|
369 |
__ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); |
|
370 |
||
371 |
return start; |
|
372 |
} |
|
373 |
||
374 |
||
375 |
//------------------------------------------------------------------------------------------------------------------------ |
|
376 |
// Continuation point for runtime calls returning with a pending exception. |
|
377 |
// The pending exception check happened in the runtime or native call stub. |
|
378 |
// The pending exception in Thread is converted into a Java-level exception. |
|
379 |
// |
|
380 |
// Contract with Java-level exception handlers: |
|
5046 | 381 |
// rax: exception |
1 | 382 |
// rdx: throwing pc |
383 |
// |
|
384 |
// NOTE: At entry of this stub, exception-pc must be on stack !! |
|
385 |
||
386 |
address generate_forward_exception() { |
|
387 |
StubCodeMark mark(this, "StubRoutines", "forward exception"); |
|
388 |
address start = __ pc(); |
|
5046 | 389 |
const Register thread = rcx; |
390 |
||
391 |
// other registers used in this stub |
|
392 |
const Register exception_oop = rax; |
|
393 |
const Register handler_addr = rbx; |
|
394 |
const Register exception_pc = rdx; |
|
1 | 395 |
|
396 |
// Upon entry, the sp points to the return address returning into Java |
|
397 |
// (interpreted or compiled) code; i.e., the return address becomes the |
|
398 |
// throwing pc. |
|
399 |
// |
|
400 |
// Arguments pushed before the runtime call are still on the stack but |
|
401 |
// the exception handler will reset the stack pointer -> ignore them. |
|
402 |
// A potential result in registers can be ignored as well. |
|
403 |
||
404 |
#ifdef ASSERT |
|
405 |
// make sure this code is only executed if there is a pending exception |
|
406 |
{ Label L; |
|
5046 | 407 |
__ get_thread(thread); |
408 |
__ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
1 | 409 |
__ jcc(Assembler::notEqual, L); |
410 |
__ stop("StubRoutines::forward exception: no pending exception (1)"); |
|
411 |
__ bind(L); |
|
412 |
} |
|
413 |
#endif |
|
414 |
||
415 |
// compute exception handler into rbx, |
|
5046 | 416 |
__ get_thread(thread); |
417 |
__ movptr(exception_pc, Address(rsp, 0)); |
|
1 | 418 |
BLOCK_COMMENT("call exception_handler_for_return_address"); |
5046 | 419 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc); |
420 |
__ mov(handler_addr, rax); |
|
1 | 421 |
|
5046 | 422 |
// setup rax & rdx, remove return address & clear pending exception |
423 |
__ get_thread(thread); |
|
424 |
__ pop(exception_pc); |
|
425 |
__ movptr(exception_oop, Address(thread, Thread::pending_exception_offset())); |
|
426 |
__ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); |
|
1 | 427 |
|
428 |
#ifdef ASSERT |
|
429 |
// make sure exception is set |
|
430 |
{ Label L; |
|
5046 | 431 |
__ testptr(exception_oop, exception_oop); |
1 | 432 |
__ jcc(Assembler::notEqual, L); |
433 |
__ stop("StubRoutines::forward exception: no pending exception (2)"); |
|
434 |
__ bind(L); |
|
435 |
} |
|
436 |
#endif |
|
437 |
||
5046 | 438 |
// Verify that there is really a valid exception in RAX. |
439 |
__ verify_oop(exception_oop); |
|
440 |
||
1 | 441 |
// continue at exception handler (return address removed) |
5046 | 442 |
// rax: exception |
443 |
// rbx: exception handler |
|
1 | 444 |
// rdx: throwing pc |
5046 | 445 |
__ jmp(handler_addr); |
1 | 446 |
|
447 |
return start; |
|
448 |
} |
|
449 |
||
450 |
||
451 |
//---------------------------------------------------------------------------------------------------- |
|
452 |
// Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest) |
|
453 |
// |
|
454 |
// xchg exists as far back as 8086, lock needed for MP only |
|
455 |
// Stack layout immediately after call: |
|
456 |
// |
|
457 |
// 0 [ret addr ] <--- rsp |
|
458 |
// 1 [ ex ] |
|
459 |
// 2 [ dest ] |
|
460 |
// |
|
461 |
// Result: *dest <- ex, return (old *dest) |
|
462 |
// |
|
463 |
// Note: win32 does not currently use this code |
|
464 |
||
465 |
address generate_atomic_xchg() { |
|
466 |
StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); |
|
467 |
address start = __ pc(); |
|
468 |
||
1066 | 469 |
__ push(rdx); |
1 | 470 |
Address exchange(rsp, 2 * wordSize); |
471 |
Address dest_addr(rsp, 3 * wordSize); |
|
472 |
__ movl(rax, exchange); |
|
1066 | 473 |
__ movptr(rdx, dest_addr); |
474 |
__ xchgl(rax, Address(rdx, 0)); |
|
475 |
__ pop(rdx); |
|
1 | 476 |
__ ret(0); |
477 |
||
478 |
return start; |
|
479 |
} |
|
480 |
||
481 |
//---------------------------------------------------------------------------------------------------- |
|
482 |
// Support for void verify_mxcsr() |
|
483 |
// |
|
484 |
// This routine is used with -Xcheck:jni to verify that native |
|
485 |
// JNI code does not return to Java code without restoring the |
|
486 |
// MXCSR register to our expected state. |
|
487 |
||
488 |
||
489 |
address generate_verify_mxcsr() { |
|
490 |
StubCodeMark mark(this, "StubRoutines", "verify_mxcsr"); |
|
491 |
address start = __ pc(); |
|
492 |
||
493 |
const Address mxcsr_save(rsp, 0); |
|
494 |
||
495 |
if (CheckJNICalls && UseSSE > 0 ) { |
|
496 |
Label ok_ret; |
|
497 |
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); |
|
1066 | 498 |
__ push(rax); |
499 |
__ subptr(rsp, wordSize); // allocate a temp location |
|
1 | 500 |
__ stmxcsr(mxcsr_save); |
501 |
__ movl(rax, mxcsr_save); |
|
502 |
__ andl(rax, MXCSR_MASK); |
|
503 |
__ cmp32(rax, mxcsr_std); |
|
504 |
__ jcc(Assembler::equal, ok_ret); |
|
505 |
||
506 |
__ warn("MXCSR changed by native JNI code."); |
|
507 |
||
508 |
__ ldmxcsr(mxcsr_std); |
|
509 |
||
510 |
__ bind(ok_ret); |
|
1066 | 511 |
__ addptr(rsp, wordSize); |
512 |
__ pop(rax); |
|
1 | 513 |
} |
514 |
||
515 |
__ ret(0); |
|
516 |
||
517 |
return start; |
|
518 |
} |
|
519 |
||
520 |
||
521 |
//--------------------------------------------------------------------------- |
|
522 |
// Support for void verify_fpu_cntrl_wrd() |
|
523 |
// |
|
524 |
// This routine is used with -Xcheck:jni to verify that native |
|
525 |
// JNI code does not return to Java code without restoring the |
|
526 |
// FP control word to our expected state. |
|
527 |
||
528 |
address generate_verify_fpu_cntrl_wrd() { |
|
529 |
StubCodeMark mark(this, "StubRoutines", "verify_spcw"); |
|
530 |
address start = __ pc(); |
|
531 |
||
532 |
const Address fpu_cntrl_wrd_save(rsp, 0); |
|
533 |
||
534 |
if (CheckJNICalls) { |
|
535 |
Label ok_ret; |
|
1066 | 536 |
__ push(rax); |
537 |
__ subptr(rsp, wordSize); // allocate a temp location |
|
1 | 538 |
__ fnstcw(fpu_cntrl_wrd_save); |
539 |
__ movl(rax, fpu_cntrl_wrd_save); |
|
540 |
__ andl(rax, FPU_CNTRL_WRD_MASK); |
|
541 |
ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std()); |
|
542 |
__ cmp32(rax, fpu_std); |
|
543 |
__ jcc(Assembler::equal, ok_ret); |
|
544 |
||
545 |
__ warn("Floating point control word changed by native JNI code."); |
|
546 |
||
547 |
__ fldcw(fpu_std); |
|
548 |
||
549 |
__ bind(ok_ret); |
|
1066 | 550 |
__ addptr(rsp, wordSize); |
551 |
__ pop(rax); |
|
1 | 552 |
} |
553 |
||
554 |
__ ret(0); |
|
555 |
||
556 |
return start; |
|
557 |
} |
|
558 |
||
559 |
//--------------------------------------------------------------------------- |
|
560 |
// Wrapper for slow-case handling of double-to-integer conversion |
|
561 |
// d2i or f2i fast case failed either because it is nan or because |
|
562 |
// of under/overflow. |
|
563 |
// Input: FPU TOS: float value |
|
564 |
// Output: rax, (rdx): integer (long) result |
|
565 |
||
566 |
address generate_d2i_wrapper(BasicType t, address fcn) { |
|
567 |
StubCodeMark mark(this, "StubRoutines", "d2i_wrapper"); |
|
568 |
address start = __ pc(); |
|
569 |
||
570 |
// Capture info about frame layout |
|
571 |
enum layout { FPUState_off = 0, |
|
572 |
rbp_off = FPUStateSizeInWords, |
|
573 |
rdi_off, |
|
574 |
rsi_off, |
|
575 |
rcx_off, |
|
576 |
rbx_off, |
|
577 |
saved_argument_off, |
|
578 |
saved_argument_off2, // 2nd half of double |
|
579 |
framesize |
|
580 |
}; |
|
581 |
||
582 |
assert(FPUStateSizeInWords == 27, "update stack layout"); |
|
583 |
||
584 |
// Save outgoing argument to stack across push_FPU_state() |
|
1066 | 585 |
__ subptr(rsp, wordSize * 2); |
1 | 586 |
__ fstp_d(Address(rsp, 0)); |
587 |
||
588 |
// Save CPU & FPU state |
|
1066 | 589 |
__ push(rbx); |
590 |
__ push(rcx); |
|
591 |
__ push(rsi); |
|
592 |
__ push(rdi); |
|
593 |
__ push(rbp); |
|
1 | 594 |
__ push_FPU_state(); |
595 |
||
596 |
// push_FPU_state() resets the FP top of stack |
|
597 |
// Load original double into FP top of stack |
|
598 |
__ fld_d(Address(rsp, saved_argument_off * wordSize)); |
|
599 |
// Store double into stack as outgoing argument |
|
1066 | 600 |
__ subptr(rsp, wordSize*2); |
1 | 601 |
__ fst_d(Address(rsp, 0)); |
602 |
||
603 |
// Prepare FPU for doing math in C-land |
|
604 |
__ empty_FPU_stack(); |
|
605 |
// Call the C code to massage the double. Result in EAX |
|
606 |
if (t == T_INT) |
|
607 |
{ BLOCK_COMMENT("SharedRuntime::d2i"); } |
|
608 |
else if (t == T_LONG) |
|
609 |
{ BLOCK_COMMENT("SharedRuntime::d2l"); } |
|
610 |
__ call_VM_leaf( fcn, 2 ); |
|
611 |
||
612 |
// Restore CPU & FPU state |
|
613 |
__ pop_FPU_state(); |
|
1066 | 614 |
__ pop(rbp); |
615 |
__ pop(rdi); |
|
616 |
__ pop(rsi); |
|
617 |
__ pop(rcx); |
|
618 |
__ pop(rbx); |
|
619 |
__ addptr(rsp, wordSize * 2); |
|
1 | 620 |
|
621 |
__ ret(0); |
|
622 |
||
623 |
return start; |
|
624 |
} |
|
625 |
||
626 |
||
627 |
//--------------------------------------------------------------------------- |
|
628 |
// The following routine generates a subroutine to throw an asynchronous |
|
629 |
// UnknownError when an unsafe access gets a fault that could not be |
|
630 |
// reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) |
|
631 |
address generate_handler_for_unsafe_access() { |
|
632 |
StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); |
|
633 |
address start = __ pc(); |
|
634 |
||
1066 | 635 |
__ push(0); // hole for return address-to-be |
636 |
__ pusha(); // push registers |
|
1 | 637 |
Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord); |
638 |
BLOCK_COMMENT("call handle_unsafe_access"); |
|
639 |
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access))); |
|
1066 | 640 |
__ movptr(next_pc, rax); // stuff next address |
641 |
__ popa(); |
|
1 | 642 |
__ ret(0); // jump to next address |
643 |
||
644 |
return start; |
|
645 |
} |
|
646 |
||
647 |
||
648 |
//---------------------------------------------------------------------------------------------------- |
|
649 |
// Non-destructive plausibility checks for oops |
|
650 |
||
651 |
address generate_verify_oop() { |
|
652 |
StubCodeMark mark(this, "StubRoutines", "verify_oop"); |
|
653 |
address start = __ pc(); |
|
654 |
||
655 |
// Incoming arguments on stack after saving rax,: |
|
656 |
// |
|
657 |
// [tos ]: saved rdx |
|
658 |
// [tos + 1]: saved EFLAGS |
|
659 |
// [tos + 2]: return address |
|
660 |
// [tos + 3]: char* error message |
|
661 |
// [tos + 4]: oop object to verify |
|
662 |
// [tos + 5]: saved rax, - saved by caller and bashed |
|
663 |
||
664 |
Label exit, error; |
|
1066 | 665 |
__ pushf(); |
666 |
__ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); |
|
667 |
__ push(rdx); // save rdx |
|
1 | 668 |
// make sure object is 'reasonable' |
1066 | 669 |
__ movptr(rax, Address(rsp, 4 * wordSize)); // get object |
670 |
__ testptr(rax, rax); |
|
1 | 671 |
__ jcc(Assembler::zero, exit); // if obj is NULL it is ok |
672 |
||
673 |
// Check if the oop is in the right area of memory |
|
674 |
const int oop_mask = Universe::verify_oop_mask(); |
|
675 |
const int oop_bits = Universe::verify_oop_bits(); |
|
1066 | 676 |
__ mov(rdx, rax); |
677 |
__ andptr(rdx, oop_mask); |
|
678 |
__ cmpptr(rdx, oop_bits); |
|
1 | 679 |
__ jcc(Assembler::notZero, error); |
680 |
||
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
681 |
// make sure klass is 'reasonable', which is not zero. |
1066 | 682 |
__ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass |
683 |
__ testptr(rax, rax); |
|
1 | 684 |
__ jcc(Assembler::zero, error); // if klass is NULL it is broken |
685 |
||
686 |
// return if everything seems ok |
|
687 |
__ bind(exit); |
|
1066 | 688 |
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
689 |
__ pop(rdx); // restore rdx |
|
690 |
__ popf(); // restore EFLAGS |
|
1 | 691 |
__ ret(3 * wordSize); // pop arguments |
692 |
||
693 |
// handle errors |
|
694 |
__ bind(error); |
|
1066 | 695 |
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
696 |
__ pop(rdx); // get saved rdx back |
|
697 |
__ popf(); // get saved EFLAGS off stack -- will be ignored |
|
698 |
__ pusha(); // push registers (eip = return address & msg are already pushed) |
|
1 | 699 |
BLOCK_COMMENT("call MacroAssembler::debug"); |
1066 | 700 |
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); |
701 |
__ popa(); |
|
1 | 702 |
__ ret(3 * wordSize); // pop arguments |
703 |
return start; |
|
704 |
} |
|
705 |
||
706 |
// |
|
707 |
// Generate pre-barrier for array stores |
|
708 |
// |
|
709 |
// Input: |
|
710 |
// start - starting address |
|
3262
30d1c247fc25
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
2534
diff
changeset
|
711 |
// count - element count |
8498 | 712 |
void gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) { |
1 | 713 |
assert_different_registers(start, count); |
714 |
BarrierSet* bs = Universe::heap()->barrier_set(); |
|
715 |
switch (bs->kind()) { |
|
716 |
case BarrierSet::G1SATBCTLogging: |
|
8498 | 717 |
// With G1, don't generate the call if we statically know that the target in uninitialized |
718 |
if (!uninitialized_target) { |
|
719 |
__ pusha(); // push registers |
|
720 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), |
|
721 |
start, count); |
|
722 |
__ popa(); |
|
723 |
} |
|
1 | 724 |
break; |
32596
8feecdee3156
8072817: CardTableExtension kind() should be BarrierSet::CardTableExtension
kbarrett
parents:
31771
diff
changeset
|
725 |
case BarrierSet::CardTableForRS: |
1 | 726 |
case BarrierSet::CardTableExtension: |
727 |
case BarrierSet::ModRef: |
|
728 |
break; |
|
729 |
default : |
|
730 |
ShouldNotReachHere(); |
|
731 |
||
732 |
} |
|
733 |
} |
|
734 |
||
735 |
||
736 |
// |
|
737 |
// Generate a post-barrier for an array store |
|
738 |
// |
|
739 |
// start - starting address |
|
740 |
// count - element count |
|
741 |
// |
|
742 |
// The two input registers are overwritten. |
|
743 |
// |
|
744 |
void gen_write_ref_array_post_barrier(Register start, Register count) { |
|
745 |
BarrierSet* bs = Universe::heap()->barrier_set(); |
|
746 |
assert_different_registers(start, count); |
|
747 |
switch (bs->kind()) { |
|
748 |
case BarrierSet::G1SATBCTLogging: |
|
749 |
{ |
|
1066 | 750 |
__ pusha(); // push registers |
4740
d708800308b7
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
4645
diff
changeset
|
751 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), |
d708800308b7
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
4645
diff
changeset
|
752 |
start, count); |
1066 | 753 |
__ popa(); |
1 | 754 |
} |
755 |
break; |
|
756 |
||
32596
8feecdee3156
8072817: CardTableExtension kind() should be BarrierSet::CardTableExtension
kbarrett
parents:
31771
diff
changeset
|
757 |
case BarrierSet::CardTableForRS: |
1 | 758 |
case BarrierSet::CardTableExtension: |
759 |
{ |
|
29325 | 760 |
CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs); |
1 | 761 |
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
762 |
||
763 |
Label L_loop; |
|
764 |
const Register end = count; // elements count; end == start+count-1 |
|
765 |
assert_different_registers(start, end); |
|
766 |
||
1066 | 767 |
__ lea(end, Address(start, count, Address::times_ptr, -wordSize)); |
768 |
__ shrptr(start, CardTableModRefBS::card_shift); |
|
769 |
__ shrptr(end, CardTableModRefBS::card_shift); |
|
770 |
__ subptr(end, start); // end --> count |
|
1 | 771 |
__ BIND(L_loop); |
957
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
772 |
intptr_t disp = (intptr_t) ct->byte_map_base; |
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
773 |
Address cardtable(start, count, Address::times_1, disp); |
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
774 |
__ movb(cardtable, 0); |
1 | 775 |
__ decrement(count); |
776 |
__ jcc(Assembler::greaterEqual, L_loop); |
|
777 |
} |
|
778 |
break; |
|
779 |
case BarrierSet::ModRef: |
|
780 |
break; |
|
781 |
default : |
|
782 |
ShouldNotReachHere(); |
|
783 |
||
784 |
} |
|
785 |
} |
|
786 |
||
1437 | 787 |
|
788 |
// Copy 64 bytes chunks |
|
789 |
// |
|
790 |
// Inputs: |
|
791 |
// from - source array address |
|
792 |
// to_from - destination array address - from |
|
793 |
// qword_count - 8-bytes element count, negative |
|
794 |
// |
|
795 |
void xmm_copy_forward(Register from, Register to_from, Register qword_count) { |
|
796 |
assert( UseSSE >= 2, "supported cpu only" ); |
|
797 |
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
|
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
798 |
if (UseAVX > 2) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
799 |
__ push(rbx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
800 |
__ movl(rbx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
801 |
__ kmovdl(k1, rbx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
802 |
__ pop(rbx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
803 |
} |
1437 | 804 |
// Copy 64-byte chunks |
805 |
__ jmpb(L_copy_64_bytes); |
|
5249 | 806 |
__ align(OptoLoopAlignment); |
1437 | 807 |
__ BIND(L_copy_64_bytes_loop); |
808 |
||
15115
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
809 |
if (UseUnalignedLoadStores) { |
30624 | 810 |
if (UseAVX > 2) { |
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
811 |
__ evmovdqul(xmm0, Address(from, 0), Assembler::AVX_512bit); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
812 |
__ evmovdqul(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit); |
30624 | 813 |
} else if (UseAVX == 2) { |
15115
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
814 |
__ vmovdqu(xmm0, Address(from, 0)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
815 |
__ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
816 |
__ vmovdqu(xmm1, Address(from, 32)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
817 |
__ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
818 |
} else { |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
819 |
__ movdqu(xmm0, Address(from, 0)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
820 |
__ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
821 |
__ movdqu(xmm1, Address(from, 16)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
822 |
__ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
823 |
__ movdqu(xmm2, Address(from, 32)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
824 |
__ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
825 |
__ movdqu(xmm3, Address(from, 48)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
826 |
__ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
827 |
} |
1437 | 828 |
} else { |
829 |
__ movq(xmm0, Address(from, 0)); |
|
830 |
__ movq(Address(from, to_from, Address::times_1, 0), xmm0); |
|
831 |
__ movq(xmm1, Address(from, 8)); |
|
832 |
__ movq(Address(from, to_from, Address::times_1, 8), xmm1); |
|
833 |
__ movq(xmm2, Address(from, 16)); |
|
834 |
__ movq(Address(from, to_from, Address::times_1, 16), xmm2); |
|
835 |
__ movq(xmm3, Address(from, 24)); |
|
836 |
__ movq(Address(from, to_from, Address::times_1, 24), xmm3); |
|
837 |
__ movq(xmm4, Address(from, 32)); |
|
838 |
__ movq(Address(from, to_from, Address::times_1, 32), xmm4); |
|
839 |
__ movq(xmm5, Address(from, 40)); |
|
840 |
__ movq(Address(from, to_from, Address::times_1, 40), xmm5); |
|
841 |
__ movq(xmm6, Address(from, 48)); |
|
842 |
__ movq(Address(from, to_from, Address::times_1, 48), xmm6); |
|
843 |
__ movq(xmm7, Address(from, 56)); |
|
844 |
__ movq(Address(from, to_from, Address::times_1, 56), xmm7); |
|
845 |
} |
|
846 |
||
847 |
__ addl(from, 64); |
|
848 |
__ BIND(L_copy_64_bytes); |
|
849 |
__ subl(qword_count, 8); |
|
850 |
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); |
|
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
851 |
|
30624 | 852 |
if (UseUnalignedLoadStores && (UseAVX == 2)) { |
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
853 |
// clean upper bits of YMM registers |
30299 | 854 |
__ vpxor(xmm0, xmm0); |
855 |
__ vpxor(xmm1, xmm1); |
|
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
856 |
} |
1437 | 857 |
__ addl(qword_count, 8); |
858 |
__ jccb(Assembler::zero, L_exit); |
|
859 |
// |
|
860 |
// length is too short, just copy qwords |
|
861 |
// |
|
862 |
__ BIND(L_copy_8_bytes); |
|
863 |
__ movq(xmm0, Address(from, 0)); |
|
864 |
__ movq(Address(from, to_from, Address::times_1), xmm0); |
|
865 |
__ addl(from, 8); |
|
866 |
__ decrement(qword_count); |
|
867 |
__ jcc(Assembler::greater, L_copy_8_bytes); |
|
868 |
__ BIND(L_exit); |
|
869 |
} |
|
870 |
||
1 | 871 |
// Copy 64 bytes chunks |
872 |
// |
|
873 |
// Inputs: |
|
874 |
// from - source array address |
|
875 |
// to_from - destination array address - from |
|
876 |
// qword_count - 8-bytes element count, negative |
|
877 |
// |
|
878 |
void mmx_copy_forward(Register from, Register to_from, Register qword_count) { |
|
1437 | 879 |
assert( VM_Version::supports_mmx(), "supported cpu only" ); |
1 | 880 |
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
881 |
// Copy 64-byte chunks |
|
882 |
__ jmpb(L_copy_64_bytes); |
|
5249 | 883 |
__ align(OptoLoopAlignment); |
1 | 884 |
__ BIND(L_copy_64_bytes_loop); |
885 |
__ movq(mmx0, Address(from, 0)); |
|
886 |
__ movq(mmx1, Address(from, 8)); |
|
887 |
__ movq(mmx2, Address(from, 16)); |
|
888 |
__ movq(Address(from, to_from, Address::times_1, 0), mmx0); |
|
889 |
__ movq(mmx3, Address(from, 24)); |
|
890 |
__ movq(Address(from, to_from, Address::times_1, 8), mmx1); |
|
891 |
__ movq(mmx4, Address(from, 32)); |
|
892 |
__ movq(Address(from, to_from, Address::times_1, 16), mmx2); |
|
893 |
__ movq(mmx5, Address(from, 40)); |
|
894 |
__ movq(Address(from, to_from, Address::times_1, 24), mmx3); |
|
895 |
__ movq(mmx6, Address(from, 48)); |
|
896 |
__ movq(Address(from, to_from, Address::times_1, 32), mmx4); |
|
897 |
__ movq(mmx7, Address(from, 56)); |
|
898 |
__ movq(Address(from, to_from, Address::times_1, 40), mmx5); |
|
899 |
__ movq(Address(from, to_from, Address::times_1, 48), mmx6); |
|
900 |
__ movq(Address(from, to_from, Address::times_1, 56), mmx7); |
|
1066 | 901 |
__ addptr(from, 64); |
1 | 902 |
__ BIND(L_copy_64_bytes); |
903 |
__ subl(qword_count, 8); |
|
904 |
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); |
|
905 |
__ addl(qword_count, 8); |
|
906 |
__ jccb(Assembler::zero, L_exit); |
|
907 |
// |
|
908 |
// length is too short, just copy qwords |
|
909 |
// |
|
910 |
__ BIND(L_copy_8_bytes); |
|
911 |
__ movq(mmx0, Address(from, 0)); |
|
912 |
__ movq(Address(from, to_from, Address::times_1), mmx0); |
|
1066 | 913 |
__ addptr(from, 8); |
1 | 914 |
__ decrement(qword_count); |
915 |
__ jcc(Assembler::greater, L_copy_8_bytes); |
|
916 |
__ BIND(L_exit); |
|
917 |
__ emms(); |
|
918 |
} |
|
919 |
||
920 |
address generate_disjoint_copy(BasicType t, bool aligned, |
|
921 |
Address::ScaleFactor sf, |
|
8498 | 922 |
address* entry, const char *name, |
923 |
bool dest_uninitialized = false) { |
|
1 | 924 |
__ align(CodeEntryAlignment); |
925 |
StubCodeMark mark(this, "StubRoutines", name); |
|
926 |
address start = __ pc(); |
|
927 |
||
928 |
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; |
|
929 |
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes; |
|
930 |
||
1066 | 931 |
int shift = Address::times_ptr - sf; |
1 | 932 |
|
933 |
const Register from = rsi; // source array address |
|
934 |
const Register to = rdi; // destination array address |
|
935 |
const Register count = rcx; // elements count |
|
936 |
const Register to_from = to; // (to - from) |
|
937 |
const Register saved_to = rdx; // saved destination array address |
|
938 |
||
939 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 940 |
__ push(rsi); |
941 |
__ push(rdi); |
|
942 |
__ movptr(from , Address(rsp, 12+ 4)); |
|
943 |
__ movptr(to , Address(rsp, 12+ 8)); |
|
1 | 944 |
__ movl(count, Address(rsp, 12+ 12)); |
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
945 |
|
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
946 |
if (entry != NULL) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
947 |
*entry = __ pc(); // Entry point from conjoint arraycopy stub. |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
948 |
BLOCK_COMMENT("Entry:"); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
949 |
} |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
950 |
|
1 | 951 |
if (t == T_OBJECT) { |
952 |
__ testl(count, count); |
|
953 |
__ jcc(Assembler::zero, L_0_count); |
|
8498 | 954 |
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
1066 | 955 |
__ mov(saved_to, to); // save 'to' |
1 | 956 |
} |
957 |
||
1066 | 958 |
__ subptr(to, from); // to --> to_from |
1 | 959 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
960 |
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp |
|
1437 | 961 |
if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { |
1 | 962 |
// align source address at 4 bytes address boundary |
963 |
if (t == T_BYTE) { |
|
964 |
// One byte misalignment happens only for byte arrays |
|
965 |
__ testl(from, 1); |
|
966 |
__ jccb(Assembler::zero, L_skip_align1); |
|
967 |
__ movb(rax, Address(from, 0)); |
|
968 |
__ movb(Address(from, to_from, Address::times_1, 0), rax); |
|
969 |
__ increment(from); |
|
970 |
__ decrement(count); |
|
971 |
__ BIND(L_skip_align1); |
|
972 |
} |
|
973 |
// Two bytes misalignment happens only for byte and short (char) arrays |
|
974 |
__ testl(from, 2); |
|
975 |
__ jccb(Assembler::zero, L_skip_align2); |
|
976 |
__ movw(rax, Address(from, 0)); |
|
977 |
__ movw(Address(from, to_from, Address::times_1, 0), rax); |
|
1066 | 978 |
__ addptr(from, 2); |
1 | 979 |
__ subl(count, 1<<(shift-1)); |
980 |
__ BIND(L_skip_align2); |
|
981 |
} |
|
982 |
if (!VM_Version::supports_mmx()) { |
|
1066 | 983 |
__ mov(rax, count); // save 'count' |
984 |
__ shrl(count, shift); // bytes count |
|
985 |
__ addptr(to_from, from);// restore 'to' |
|
986 |
__ rep_mov(); |
|
987 |
__ subptr(to_from, from);// restore 'to_from' |
|
988 |
__ mov(count, rax); // restore 'count' |
|
1 | 989 |
__ jmpb(L_copy_2_bytes); // all dwords were copied |
990 |
} else { |
|
1437 | 991 |
if (!UseUnalignedLoadStores) { |
992 |
// align to 8 bytes, we know we are 4 byte aligned to start |
|
993 |
__ testptr(from, 4); |
|
994 |
__ jccb(Assembler::zero, L_copy_64_bytes); |
|
995 |
__ movl(rax, Address(from, 0)); |
|
996 |
__ movl(Address(from, to_from, Address::times_1, 0), rax); |
|
997 |
__ addptr(from, 4); |
|
998 |
__ subl(count, 1<<shift); |
|
999 |
} |
|
1 | 1000 |
__ BIND(L_copy_64_bytes); |
1066 | 1001 |
__ mov(rax, count); |
1 | 1002 |
__ shrl(rax, shift+1); // 8 bytes chunk count |
1003 |
// |
|
1004 |
// Copy 8-byte chunks through MMX registers, 8 per iteration of the loop |
|
1005 |
// |
|
1437 | 1006 |
if (UseXMMForArrayCopy) { |
1007 |
xmm_copy_forward(from, to_from, rax); |
|
1008 |
} else { |
|
1009 |
mmx_copy_forward(from, to_from, rax); |
|
1010 |
} |
|
1 | 1011 |
} |
1012 |
// copy tailing dword |
|
1013 |
__ BIND(L_copy_4_bytes); |
|
1014 |
__ testl(count, 1<<shift); |
|
1015 |
__ jccb(Assembler::zero, L_copy_2_bytes); |
|
1016 |
__ movl(rax, Address(from, 0)); |
|
1017 |
__ movl(Address(from, to_from, Address::times_1, 0), rax); |
|
1018 |
if (t == T_BYTE || t == T_SHORT) { |
|
1066 | 1019 |
__ addptr(from, 4); |
1 | 1020 |
__ BIND(L_copy_2_bytes); |
1021 |
// copy tailing word |
|
1022 |
__ testl(count, 1<<(shift-1)); |
|
1023 |
__ jccb(Assembler::zero, L_copy_byte); |
|
1024 |
__ movw(rax, Address(from, 0)); |
|
1025 |
__ movw(Address(from, to_from, Address::times_1, 0), rax); |
|
1026 |
if (t == T_BYTE) { |
|
1066 | 1027 |
__ addptr(from, 2); |
1 | 1028 |
__ BIND(L_copy_byte); |
1029 |
// copy tailing byte |
|
1030 |
__ testl(count, 1); |
|
1031 |
__ jccb(Assembler::zero, L_exit); |
|
1032 |
__ movb(rax, Address(from, 0)); |
|
1033 |
__ movb(Address(from, to_from, Address::times_1, 0), rax); |
|
1034 |
__ BIND(L_exit); |
|
1035 |
} else { |
|
1036 |
__ BIND(L_copy_byte); |
|
1037 |
} |
|
1038 |
} else { |
|
1039 |
__ BIND(L_copy_2_bytes); |
|
1040 |
} |
|
1041 |
||
1042 |
if (t == T_OBJECT) { |
|
1043 |
__ movl(count, Address(rsp, 12+12)); // reread 'count' |
|
1066 | 1044 |
__ mov(to, saved_to); // restore 'to' |
1 | 1045 |
gen_write_ref_array_post_barrier(to, count); |
1046 |
__ BIND(L_0_count); |
|
1047 |
} |
|
1048 |
inc_copy_counter_np(t); |
|
1066 | 1049 |
__ pop(rdi); |
1050 |
__ pop(rsi); |
|
1 | 1051 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1066 | 1052 |
__ xorptr(rax, rax); // return 0 |
1 | 1053 |
__ ret(0); |
1054 |
return start; |
|
1055 |
} |
|
1056 |
||
1057 |
||
6433 | 1058 |
address generate_fill(BasicType t, bool aligned, const char *name) { |
1059 |
__ align(CodeEntryAlignment); |
|
1060 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1061 |
address start = __ pc(); |
|
1062 |
||
1063 |
BLOCK_COMMENT("Entry:"); |
|
1064 |
||
1065 |
const Register to = rdi; // source array address |
|
1066 |
const Register value = rdx; // value |
|
1067 |
const Register count = rsi; // elements count |
|
1068 |
||
1069 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1070 |
__ push(rsi); |
|
1071 |
__ push(rdi); |
|
1072 |
__ movptr(to , Address(rsp, 12+ 4)); |
|
1073 |
__ movl(value, Address(rsp, 12+ 8)); |
|
1074 |
__ movl(count, Address(rsp, 12+ 12)); |
|
1075 |
||
1076 |
__ generate_fill(t, aligned, to, value, count, rax, xmm0); |
|
1077 |
||
1078 |
__ pop(rdi); |
|
1079 |
__ pop(rsi); |
|
1080 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1081 |
__ ret(0); |
|
1082 |
return start; |
|
1083 |
} |
|
1084 |
||
1 | 1085 |
address generate_conjoint_copy(BasicType t, bool aligned, |
1086 |
Address::ScaleFactor sf, |
|
1087 |
address nooverlap_target, |
|
8498 | 1088 |
address* entry, const char *name, |
1089 |
bool dest_uninitialized = false) { |
|
1 | 1090 |
__ align(CodeEntryAlignment); |
1091 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1092 |
address start = __ pc(); |
|
1093 |
||
1094 |
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; |
|
1095 |
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1096 |
||
1066 | 1097 |
int shift = Address::times_ptr - sf; |
1 | 1098 |
|
1099 |
const Register src = rax; // source array address |
|
1100 |
const Register dst = rdx; // destination array address |
|
1101 |
const Register from = rsi; // source array address |
|
1102 |
const Register to = rdi; // destination array address |
|
1103 |
const Register count = rcx; // elements count |
|
1104 |
const Register end = rax; // array end address |
|
1105 |
||
1106 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1107 |
__ push(rsi); |
1108 |
__ push(rdi); |
|
1109 |
__ movptr(src , Address(rsp, 12+ 4)); // from |
|
1110 |
__ movptr(dst , Address(rsp, 12+ 8)); // to |
|
1111 |
__ movl2ptr(count, Address(rsp, 12+12)); // count |
|
1 | 1112 |
|
1113 |
if (entry != NULL) { |
|
1114 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
|
1115 |
BLOCK_COMMENT("Entry:"); |
|
1116 |
} |
|
1117 |
||
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1118 |
// nooverlap_target expects arguments in rsi and rdi. |
1066 | 1119 |
__ mov(from, src); |
1120 |
__ mov(to , dst); |
|
1 | 1121 |
|
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1122 |
// arrays overlap test: dispatch to disjoint stub if necessary. |
1 | 1123 |
RuntimeAddress nooverlap(nooverlap_target); |
1066 | 1124 |
__ cmpptr(dst, src); |
1125 |
__ lea(end, Address(src, count, sf, 0)); // src + count * elem_size |
|
1 | 1126 |
__ jump_cc(Assembler::belowEqual, nooverlap); |
1066 | 1127 |
__ cmpptr(dst, end); |
1 | 1128 |
__ jump_cc(Assembler::aboveEqual, nooverlap); |
1129 |
||
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1130 |
if (t == T_OBJECT) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1131 |
__ testl(count, count); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1132 |
__ jcc(Assembler::zero, L_0_count); |
8498 | 1133 |
gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized); |
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1134 |
} |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1135 |
|
1 | 1136 |
// copy from high to low |
1137 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
|
1138 |
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp |
|
1139 |
if (t == T_BYTE || t == T_SHORT) { |
|
1140 |
// Align the end of destination array at 4 bytes address boundary |
|
1066 | 1141 |
__ lea(end, Address(dst, count, sf, 0)); |
1 | 1142 |
if (t == T_BYTE) { |
1143 |
// One byte misalignment happens only for byte arrays |
|
1144 |
__ testl(end, 1); |
|
1145 |
__ jccb(Assembler::zero, L_skip_align1); |
|
1146 |
__ decrement(count); |
|
1147 |
__ movb(rdx, Address(from, count, sf, 0)); |
|
1148 |
__ movb(Address(to, count, sf, 0), rdx); |
|
1149 |
__ BIND(L_skip_align1); |
|
1150 |
} |
|
1151 |
// Two bytes misalignment happens only for byte and short (char) arrays |
|
1152 |
__ testl(end, 2); |
|
1153 |
__ jccb(Assembler::zero, L_skip_align2); |
|
1066 | 1154 |
__ subptr(count, 1<<(shift-1)); |
1 | 1155 |
__ movw(rdx, Address(from, count, sf, 0)); |
1156 |
__ movw(Address(to, count, sf, 0), rdx); |
|
1157 |
__ BIND(L_skip_align2); |
|
1158 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
|
1159 |
__ jcc(Assembler::below, L_copy_4_bytes); |
|
1160 |
} |
|
1161 |
||
1162 |
if (!VM_Version::supports_mmx()) { |
|
1163 |
__ std(); |
|
1066 | 1164 |
__ mov(rax, count); // Save 'count' |
1165 |
__ mov(rdx, to); // Save 'to' |
|
1166 |
__ lea(rsi, Address(from, count, sf, -4)); |
|
1167 |
__ lea(rdi, Address(to , count, sf, -4)); |
|
1168 |
__ shrptr(count, shift); // bytes count |
|
1169 |
__ rep_mov(); |
|
1 | 1170 |
__ cld(); |
1066 | 1171 |
__ mov(count, rax); // restore 'count' |
1 | 1172 |
__ andl(count, (1<<shift)-1); // mask the number of rest elements |
1066 | 1173 |
__ movptr(from, Address(rsp, 12+4)); // reread 'from' |
1174 |
__ mov(to, rdx); // restore 'to' |
|
1 | 1175 |
__ jmpb(L_copy_2_bytes); // all dword were copied |
1176 |
} else { |
|
1177 |
// Align to 8 bytes the end of array. It is aligned to 4 bytes already. |
|
1066 | 1178 |
__ testptr(end, 4); |
1 | 1179 |
__ jccb(Assembler::zero, L_copy_8_bytes); |
1180 |
__ subl(count, 1<<shift); |
|
1181 |
__ movl(rdx, Address(from, count, sf, 0)); |
|
1182 |
__ movl(Address(to, count, sf, 0), rdx); |
|
1183 |
__ jmpb(L_copy_8_bytes); |
|
1184 |
||
5249 | 1185 |
__ align(OptoLoopAlignment); |
1 | 1186 |
// Move 8 bytes |
1187 |
__ BIND(L_copy_8_bytes_loop); |
|
1437 | 1188 |
if (UseXMMForArrayCopy) { |
1189 |
__ movq(xmm0, Address(from, count, sf, 0)); |
|
1190 |
__ movq(Address(to, count, sf, 0), xmm0); |
|
1191 |
} else { |
|
1192 |
__ movq(mmx0, Address(from, count, sf, 0)); |
|
1193 |
__ movq(Address(to, count, sf, 0), mmx0); |
|
1194 |
} |
|
1 | 1195 |
__ BIND(L_copy_8_bytes); |
1196 |
__ subl(count, 2<<shift); |
|
1197 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1198 |
__ addl(count, 2<<shift); |
|
1437 | 1199 |
if (!UseXMMForArrayCopy) { |
1200 |
__ emms(); |
|
1201 |
} |
|
1 | 1202 |
} |
1203 |
__ BIND(L_copy_4_bytes); |
|
1204 |
// copy prefix qword |
|
1205 |
__ testl(count, 1<<shift); |
|
1206 |
__ jccb(Assembler::zero, L_copy_2_bytes); |
|
1207 |
__ movl(rdx, Address(from, count, sf, -4)); |
|
1208 |
__ movl(Address(to, count, sf, -4), rdx); |
|
1209 |
||
1210 |
if (t == T_BYTE || t == T_SHORT) { |
|
1211 |
__ subl(count, (1<<shift)); |
|
1212 |
__ BIND(L_copy_2_bytes); |
|
1213 |
// copy prefix dword |
|
1214 |
__ testl(count, 1<<(shift-1)); |
|
1215 |
__ jccb(Assembler::zero, L_copy_byte); |
|
1216 |
__ movw(rdx, Address(from, count, sf, -2)); |
|
1217 |
__ movw(Address(to, count, sf, -2), rdx); |
|
1218 |
if (t == T_BYTE) { |
|
1219 |
__ subl(count, 1<<(shift-1)); |
|
1220 |
__ BIND(L_copy_byte); |
|
1221 |
// copy prefix byte |
|
1222 |
__ testl(count, 1); |
|
1223 |
__ jccb(Assembler::zero, L_exit); |
|
1224 |
__ movb(rdx, Address(from, 0)); |
|
1225 |
__ movb(Address(to, 0), rdx); |
|
1226 |
__ BIND(L_exit); |
|
1227 |
} else { |
|
1228 |
__ BIND(L_copy_byte); |
|
1229 |
} |
|
1230 |
} else { |
|
1231 |
__ BIND(L_copy_2_bytes); |
|
1232 |
} |
|
1233 |
if (t == T_OBJECT) { |
|
1066 | 1234 |
__ movl2ptr(count, Address(rsp, 12+12)); // reread count |
1 | 1235 |
gen_write_ref_array_post_barrier(to, count); |
1236 |
__ BIND(L_0_count); |
|
1237 |
} |
|
1238 |
inc_copy_counter_np(t); |
|
1066 | 1239 |
__ pop(rdi); |
1240 |
__ pop(rsi); |
|
1 | 1241 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1066 | 1242 |
__ xorptr(rax, rax); // return 0 |
1 | 1243 |
__ ret(0); |
1244 |
return start; |
|
1245 |
} |
|
1246 |
||
1247 |
||
1248 |
address generate_disjoint_long_copy(address* entry, const char *name) { |
|
1249 |
__ align(CodeEntryAlignment); |
|
1250 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1251 |
address start = __ pc(); |
|
1252 |
||
1253 |
Label L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1254 |
const Register from = rax; // source array address |
|
1255 |
const Register to = rdx; // destination array address |
|
1256 |
const Register count = rcx; // elements count |
|
1257 |
const Register to_from = rdx; // (to - from) |
|
1258 |
||
1259 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1260 |
__ movptr(from , Address(rsp, 8+0)); // from |
1261 |
__ movptr(to , Address(rsp, 8+4)); // to |
|
1262 |
__ movl2ptr(count, Address(rsp, 8+8)); // count |
|
1 | 1263 |
|
1264 |
*entry = __ pc(); // Entry point from conjoint arraycopy stub. |
|
1265 |
BLOCK_COMMENT("Entry:"); |
|
1266 |
||
1066 | 1267 |
__ subptr(to, from); // to --> to_from |
1 | 1268 |
if (VM_Version::supports_mmx()) { |
1437 | 1269 |
if (UseXMMForArrayCopy) { |
1270 |
xmm_copy_forward(from, to_from, count); |
|
1271 |
} else { |
|
1272 |
mmx_copy_forward(from, to_from, count); |
|
1273 |
} |
|
1 | 1274 |
} else { |
1275 |
__ jmpb(L_copy_8_bytes); |
|
5249 | 1276 |
__ align(OptoLoopAlignment); |
1 | 1277 |
__ BIND(L_copy_8_bytes_loop); |
1278 |
__ fild_d(Address(from, 0)); |
|
1279 |
__ fistp_d(Address(from, to_from, Address::times_1)); |
|
1066 | 1280 |
__ addptr(from, 8); |
1 | 1281 |
__ BIND(L_copy_8_bytes); |
1282 |
__ decrement(count); |
|
1283 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1284 |
} |
|
1285 |
inc_copy_counter_np(T_LONG); |
|
1286 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1287 |
__ xorptr(rax, rax); // return 0 |
1 | 1288 |
__ ret(0); |
1289 |
return start; |
|
1290 |
} |
|
1291 |
||
1292 |
address generate_conjoint_long_copy(address nooverlap_target, |
|
1293 |
address* entry, const char *name) { |
|
1294 |
__ align(CodeEntryAlignment); |
|
1295 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1296 |
address start = __ pc(); |
|
1297 |
||
1298 |
Label L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1299 |
const Register from = rax; // source array address |
|
1300 |
const Register to = rdx; // destination array address |
|
1301 |
const Register count = rcx; // elements count |
|
1302 |
const Register end_from = rax; // source array end address |
|
1303 |
||
1304 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1305 |
__ movptr(from , Address(rsp, 8+0)); // from |
1306 |
__ movptr(to , Address(rsp, 8+4)); // to |
|
1307 |
__ movl2ptr(count, Address(rsp, 8+8)); // count |
|
1 | 1308 |
|
1309 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
|
1310 |
BLOCK_COMMENT("Entry:"); |
|
1311 |
||
1312 |
// arrays overlap test |
|
1066 | 1313 |
__ cmpptr(to, from); |
1 | 1314 |
RuntimeAddress nooverlap(nooverlap_target); |
1315 |
__ jump_cc(Assembler::belowEqual, nooverlap); |
|
1066 | 1316 |
__ lea(end_from, Address(from, count, Address::times_8, 0)); |
1317 |
__ cmpptr(to, end_from); |
|
1318 |
__ movptr(from, Address(rsp, 8)); // from |
|
1 | 1319 |
__ jump_cc(Assembler::aboveEqual, nooverlap); |
1320 |
||
1321 |
__ jmpb(L_copy_8_bytes); |
|
1322 |
||
5249 | 1323 |
__ align(OptoLoopAlignment); |
1 | 1324 |
__ BIND(L_copy_8_bytes_loop); |
1325 |
if (VM_Version::supports_mmx()) { |
|
1437 | 1326 |
if (UseXMMForArrayCopy) { |
1327 |
__ movq(xmm0, Address(from, count, Address::times_8)); |
|
1328 |
__ movq(Address(to, count, Address::times_8), xmm0); |
|
1329 |
} else { |
|
1330 |
__ movq(mmx0, Address(from, count, Address::times_8)); |
|
1331 |
__ movq(Address(to, count, Address::times_8), mmx0); |
|
1332 |
} |
|
1 | 1333 |
} else { |
1334 |
__ fild_d(Address(from, count, Address::times_8)); |
|
1335 |
__ fistp_d(Address(to, count, Address::times_8)); |
|
1336 |
} |
|
1337 |
__ BIND(L_copy_8_bytes); |
|
1338 |
__ decrement(count); |
|
1339 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1340 |
||
1437 | 1341 |
if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { |
1 | 1342 |
__ emms(); |
1343 |
} |
|
1344 |
inc_copy_counter_np(T_LONG); |
|
1345 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1346 |
__ xorptr(rax, rax); // return 0 |
1 | 1347 |
__ ret(0); |
1348 |
return start; |
|
1349 |
} |
|
1350 |
||
1351 |
||
1352 |
// Helper for generating a dynamic type check. |
|
1353 |
// The sub_klass must be one of {rbx, rdx, rsi}. |
|
1354 |
// The temp is killed. |
|
1355 |
void generate_type_check(Register sub_klass, |
|
1356 |
Address& super_check_offset_addr, |
|
1357 |
Address& super_klass_addr, |
|
1358 |
Register temp, |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1359 |
Label* L_success, Label* L_failure) { |
1 | 1360 |
BLOCK_COMMENT("type_check:"); |
1361 |
||
1362 |
Label L_fallthrough; |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1363 |
#define LOCAL_JCC(assembler_con, label_ptr) \ |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1364 |
if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \ |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1365 |
else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ |
1 | 1366 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1367 |
// The following is a strange variation of the fast path which requires |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1368 |
// one less register, because needed values are on the argument stack. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1369 |
// __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1370 |
// L_success, L_failure, NULL); |
1 | 1371 |
assert_different_registers(sub_klass, temp); |
1372 |
||
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1373 |
int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
1 | 1374 |
|
1375 |
// if the pointers are equal, we are done (e.g., String[] elements) |
|
1066 | 1376 |
__ cmpptr(sub_klass, super_klass_addr); |
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1377 |
LOCAL_JCC(Assembler::equal, L_success); |
1 | 1378 |
|
1379 |
// check the supertype display: |
|
1066 | 1380 |
__ movl2ptr(temp, super_check_offset_addr); |
1 | 1381 |
Address super_check_addr(sub_klass, temp, Address::times_1, 0); |
1066 | 1382 |
__ movptr(temp, super_check_addr); // load displayed supertype |
1383 |
__ cmpptr(temp, super_klass_addr); // test the super type |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1384 |
LOCAL_JCC(Assembler::equal, L_success); |
1 | 1385 |
|
1386 |
// if it was a primary super, we can just fail immediately |
|
1387 |
__ cmpl(super_check_offset_addr, sc_offset); |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1388 |
LOCAL_JCC(Assembler::notEqual, L_failure); |
1 | 1389 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1390 |
// The repne_scan instruction uses fixed registers, which will get spilled. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1391 |
// We happen to know this works best when super_klass is in rax. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1392 |
Register super_klass = temp; |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1393 |
__ movptr(super_klass, super_klass_addr); |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1394 |
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1395 |
L_success, L_failure); |
1 | 1396 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1397 |
__ bind(L_fallthrough); |
1 | 1398 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1399 |
if (L_success == NULL) { BLOCK_COMMENT("L_success:"); } |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1400 |
if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); } |
1 | 1401 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1402 |
#undef LOCAL_JCC |
1 | 1403 |
} |
1404 |
||
1405 |
// |
|
1406 |
// Generate checkcasting array copy stub |
|
1407 |
// |
|
1408 |
// Input: |
|
1409 |
// 4(rsp) - source array address |
|
1410 |
// 8(rsp) - destination array address |
|
1411 |
// 12(rsp) - element count, can be zero |
|
1412 |
// 16(rsp) - size_t ckoff (super_check_offset) |
|
1413 |
// 20(rsp) - oop ckval (super_klass) |
|
1414 |
// |
|
1415 |
// Output: |
|
1416 |
// rax, == 0 - success |
|
1417 |
// rax, == -1^K - failure, where K is partial transfer count |
|
1418 |
// |
|
8498 | 1419 |
address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) { |
1 | 1420 |
__ align(CodeEntryAlignment); |
1421 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1422 |
address start = __ pc(); |
|
1423 |
||
1424 |
Label L_load_element, L_store_element, L_do_card_marks, L_done; |
|
1425 |
||
1426 |
// register use: |
|
1427 |
// rax, rdx, rcx -- loop control (end_from, end_to, count) |
|
1428 |
// rdi, rsi -- element access (oop, klass) |
|
1429 |
// rbx, -- temp |
|
1430 |
const Register from = rax; // source array address |
|
1431 |
const Register to = rdx; // destination array address |
|
1432 |
const Register length = rcx; // elements count |
|
1433 |
const Register elem = rdi; // each oop copied |
|
1434 |
const Register elem_klass = rsi; // each elem._klass (sub_klass) |
|
1435 |
const Register temp = rbx; // lone remaining temp |
|
1436 |
||
1437 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1438 |
||
1066 | 1439 |
__ push(rsi); |
1440 |
__ push(rdi); |
|
1441 |
__ push(rbx); |
|
1 | 1442 |
|
1443 |
Address from_arg(rsp, 16+ 4); // from |
|
1444 |
Address to_arg(rsp, 16+ 8); // to |
|
1445 |
Address length_arg(rsp, 16+12); // elements count |
|
1446 |
Address ckoff_arg(rsp, 16+16); // super_check_offset |
|
1447 |
Address ckval_arg(rsp, 16+20); // super_klass |
|
1448 |
||
1449 |
// Load up: |
|
1066 | 1450 |
__ movptr(from, from_arg); |
1451 |
__ movptr(to, to_arg); |
|
1452 |
__ movl2ptr(length, length_arg); |
|
1 | 1453 |
|
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1454 |
if (entry != NULL) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1455 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1456 |
BLOCK_COMMENT("Entry:"); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1457 |
} |
1 | 1458 |
|
1459 |
//--------------------------------------------------------------- |
|
1460 |
// Assembler stub will be used for this call to arraycopy |
|
1461 |
// if the two arrays are subtypes of Object[] but the |
|
1462 |
// destination array type is not equal to or a supertype |
|
1463 |
// of the source type. Each element must be separately |
|
1464 |
// checked. |
|
1465 |
||
1466 |
// Loop-invariant addresses. They are exclusive end pointers. |
|
1066 | 1467 |
Address end_from_addr(from, length, Address::times_ptr, 0); |
1468 |
Address end_to_addr(to, length, Address::times_ptr, 0); |
|
1 | 1469 |
|
1470 |
Register end_from = from; // re-use |
|
1471 |
Register end_to = to; // re-use |
|
1472 |
Register count = length; // re-use |
|
1473 |
||
1474 |
// Loop-variant addresses. They assume post-incremented count < 0. |
|
1066 | 1475 |
Address from_element_addr(end_from, count, Address::times_ptr, 0); |
1476 |
Address to_element_addr(end_to, count, Address::times_ptr, 0); |
|
1 | 1477 |
Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); |
1478 |
||
1479 |
// Copy from low to high addresses, indexed from the end of each array. |
|
8498 | 1480 |
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
1066 | 1481 |
__ lea(end_from, end_from_addr); |
1482 |
__ lea(end_to, end_to_addr); |
|
1 | 1483 |
assert(length == count, ""); // else fix next line: |
1066 | 1484 |
__ negptr(count); // negate and test the length |
1 | 1485 |
__ jccb(Assembler::notZero, L_load_element); |
1486 |
||
1487 |
// Empty array: Nothing to do. |
|
1066 | 1488 |
__ xorptr(rax, rax); // return 0 on (trivial) success |
1 | 1489 |
__ jmp(L_done); |
1490 |
||
1491 |
// ======== begin loop ======== |
|
1492 |
// (Loop is rotated; its entry is L_load_element.) |
|
1493 |
// Loop control: |
|
1494 |
// for (count = -count; count != 0; count++) |
|
1495 |
// Base pointers src, dst are biased by 8*count,to last element. |
|
5249 | 1496 |
__ align(OptoLoopAlignment); |
1 | 1497 |
|
1498 |
__ BIND(L_store_element); |
|
1066 | 1499 |
__ movptr(to_element_addr, elem); // store the oop |
1 | 1500 |
__ increment(count); // increment the count toward zero |
1501 |
__ jccb(Assembler::zero, L_do_card_marks); |
|
1502 |
||
1503 |
// ======== loop entry is here ======== |
|
1504 |
__ BIND(L_load_element); |
|
1066 | 1505 |
__ movptr(elem, from_element_addr); // load the oop |
1506 |
__ testptr(elem, elem); |
|
1 | 1507 |
__ jccb(Assembler::zero, L_store_element); |
1508 |
||
1509 |
// (Could do a trick here: Remember last successful non-null |
|
1510 |
// element stored and make a quick oop equality check on it.) |
|
1511 |
||
1066 | 1512 |
__ movptr(elem_klass, elem_klass_addr); // query the object klass |
1 | 1513 |
generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp, |
1514 |
&L_store_element, NULL); |
|
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1515 |
// (On fall-through, we have failed the element type check.) |
1 | 1516 |
// ======== end loop ======== |
1517 |
||
1518 |
// It was a real error; we must depend on the caller to finish the job. |
|
192
d66300cdf939
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
189
diff
changeset
|
1519 |
// Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops. |
d66300cdf939
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
189
diff
changeset
|
1520 |
// Emit GC store barriers for the oops we have copied (length_arg + count), |
1 | 1521 |
// and report their number to the caller. |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1522 |
assert_different_registers(to, count, rax); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1523 |
Label L_post_barrier; |
1 | 1524 |
__ addl(count, length_arg); // transfers = (length - remaining) |
1066 | 1525 |
__ movl2ptr(rax, count); // save the value |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1526 |
__ notptr(rax); // report (-1^K) to caller (does not affect flags) |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1527 |
__ jccb(Assembler::notZero, L_post_barrier); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1528 |
__ jmp(L_done); // K == 0, nothing was copied, skip post barrier |
1 | 1529 |
|
1530 |
// Come here on success only. |
|
1531 |
__ BIND(L_do_card_marks); |
|
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1532 |
__ xorptr(rax, rax); // return 0 on success |
1066 | 1533 |
__ movl2ptr(count, length_arg); |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1534 |
|
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1535 |
__ BIND(L_post_barrier); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1536 |
__ movptr(to, to_arg); // reload |
1 | 1537 |
gen_write_ref_array_post_barrier(to, count); |
1538 |
||
1539 |
// Common exit point (success or failure). |
|
1540 |
__ BIND(L_done); |
|
1066 | 1541 |
__ pop(rbx); |
1542 |
__ pop(rdi); |
|
1543 |
__ pop(rsi); |
|
1 | 1544 |
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); |
1545 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1546 |
__ ret(0); |
|
1547 |
||
1548 |
return start; |
|
1549 |
} |
|
1550 |
||
1551 |
// |
|
1552 |
// Generate 'unsafe' array copy stub |
|
1553 |
// Though just as safe as the other stubs, it takes an unscaled |
|
1554 |
// size_t argument instead of an element count. |
|
1555 |
// |
|
1556 |
// Input: |
|
1557 |
// 4(rsp) - source array address |
|
1558 |
// 8(rsp) - destination array address |
|
1559 |
// 12(rsp) - byte count, can be zero |
|
1560 |
// |
|
1561 |
// Output: |
|
1562 |
// rax, == 0 - success |
|
1563 |
// rax, == -1 - need to call System.arraycopy |
|
1564 |
// |
|
1565 |
// Examines the alignment of the operands and dispatches |
|
1566 |
// to a long, int, short, or byte copy loop. |
|
1567 |
// |
|
1568 |
address generate_unsafe_copy(const char *name, |
|
1569 |
address byte_copy_entry, |
|
1570 |
address short_copy_entry, |
|
1571 |
address int_copy_entry, |
|
1572 |
address long_copy_entry) { |
|
1573 |
||
1574 |
Label L_long_aligned, L_int_aligned, L_short_aligned; |
|
1575 |
||
1576 |
__ align(CodeEntryAlignment); |
|
1577 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1578 |
address start = __ pc(); |
|
1579 |
||
1580 |
const Register from = rax; // source array address |
|
1581 |
const Register to = rdx; // destination array address |
|
1582 |
const Register count = rcx; // elements count |
|
1583 |
||
1584 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1585 |
__ push(rsi); |
1586 |
__ push(rdi); |
|
1 | 1587 |
Address from_arg(rsp, 12+ 4); // from |
1588 |
Address to_arg(rsp, 12+ 8); // to |
|
1589 |
Address count_arg(rsp, 12+12); // byte count |
|
1590 |
||
1591 |
// Load up: |
|
1066 | 1592 |
__ movptr(from , from_arg); |
1593 |
__ movptr(to , to_arg); |
|
1594 |
__ movl2ptr(count, count_arg); |
|
1 | 1595 |
|
1596 |
// bump this on entry, not on exit: |
|
1597 |
inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); |
|
1598 |
||
1599 |
const Register bits = rsi; |
|
1066 | 1600 |
__ mov(bits, from); |
1601 |
__ orptr(bits, to); |
|
1602 |
__ orptr(bits, count); |
|
1 | 1603 |
|
1604 |
__ testl(bits, BytesPerLong-1); |
|
1605 |
__ jccb(Assembler::zero, L_long_aligned); |
|
1606 |
||
1607 |
__ testl(bits, BytesPerInt-1); |
|
1608 |
__ jccb(Assembler::zero, L_int_aligned); |
|
1609 |
||
1610 |
__ testl(bits, BytesPerShort-1); |
|
1611 |
__ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); |
|
1612 |
||
1613 |
__ BIND(L_short_aligned); |
|
1066 | 1614 |
__ shrptr(count, LogBytesPerShort); // size => short_count |
1 | 1615 |
__ movl(count_arg, count); // update 'count' |
1616 |
__ jump(RuntimeAddress(short_copy_entry)); |
|
1617 |
||
1618 |
__ BIND(L_int_aligned); |
|
1066 | 1619 |
__ shrptr(count, LogBytesPerInt); // size => int_count |
1 | 1620 |
__ movl(count_arg, count); // update 'count' |
1621 |
__ jump(RuntimeAddress(int_copy_entry)); |
|
1622 |
||
1623 |
__ BIND(L_long_aligned); |
|
1066 | 1624 |
__ shrptr(count, LogBytesPerLong); // size => qword_count |
1 | 1625 |
__ movl(count_arg, count); // update 'count' |
1066 | 1626 |
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1627 |
__ pop(rsi); |
|
1 | 1628 |
__ jump(RuntimeAddress(long_copy_entry)); |
1629 |
||
1630 |
return start; |
|
1631 |
} |
|
1632 |
||
1633 |
||
1634 |
// Perform range checks on the proposed arraycopy. |
|
1635 |
// Smashes src_pos and dst_pos. (Uses them up for temps.) |
|
1636 |
void arraycopy_range_checks(Register src, |
|
1637 |
Register src_pos, |
|
1638 |
Register dst, |
|
1639 |
Register dst_pos, |
|
1640 |
Address& length, |
|
1641 |
Label& L_failed) { |
|
1642 |
BLOCK_COMMENT("arraycopy_range_checks:"); |
|
1643 |
const Register src_end = src_pos; // source array end position |
|
1644 |
const Register dst_end = dst_pos; // destination array end position |
|
1645 |
__ addl(src_end, length); // src_pos + length |
|
1646 |
__ addl(dst_end, length); // dst_pos + length |
|
1647 |
||
1648 |
// if (src_pos + length > arrayOop(src)->length() ) FAIL; |
|
1649 |
__ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes())); |
|
1650 |
__ jcc(Assembler::above, L_failed); |
|
1651 |
||
1652 |
// if (dst_pos + length > arrayOop(dst)->length() ) FAIL; |
|
1653 |
__ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes())); |
|
1654 |
__ jcc(Assembler::above, L_failed); |
|
1655 |
||
1656 |
BLOCK_COMMENT("arraycopy_range_checks done"); |
|
1657 |
} |
|
1658 |
||
1659 |
||
1660 |
// |
|
1661 |
// Generate generic array copy stubs |
|
1662 |
// |
|
1663 |
// Input: |
|
1664 |
// 4(rsp) - src oop |
|
1665 |
// 8(rsp) - src_pos |
|
1666 |
// 12(rsp) - dst oop |
|
1667 |
// 16(rsp) - dst_pos |
|
1668 |
// 20(rsp) - element count |
|
1669 |
// |
|
1670 |
// Output: |
|
1671 |
// rax, == 0 - success |
|
1672 |
// rax, == -1^K - failure, where K is partial transfer count |
|
1673 |
// |
|
1674 |
address generate_generic_copy(const char *name, |
|
1675 |
address entry_jbyte_arraycopy, |
|
1676 |
address entry_jshort_arraycopy, |
|
1677 |
address entry_jint_arraycopy, |
|
1678 |
address entry_oop_arraycopy, |
|
1679 |
address entry_jlong_arraycopy, |
|
1680 |
address entry_checkcast_arraycopy) { |
|
1681 |
Label L_failed, L_failed_0, L_objArray; |
|
1682 |
||
1683 |
{ int modulus = CodeEntryAlignment; |
|
1684 |
int target = modulus - 5; // 5 = sizeof jmp(L_failed) |
|
1685 |
int advance = target - (__ offset() % modulus); |
|
1686 |
if (advance < 0) advance += modulus; |
|
1687 |
if (advance > 0) __ nop(advance); |
|
1688 |
} |
|
1689 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1690 |
||
1691 |
// Short-hop target to L_failed. Makes for denser prologue code. |
|
1692 |
__ BIND(L_failed_0); |
|
1693 |
__ jmp(L_failed); |
|
1694 |
assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); |
|
1695 |
||
1696 |
__ align(CodeEntryAlignment); |
|
1697 |
address start = __ pc(); |
|
1698 |
||
1699 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1700 |
__ push(rsi); |
1701 |
__ push(rdi); |
|
1 | 1702 |
|
1703 |
// bump this on entry, not on exit: |
|
1704 |
inc_counter_np(SharedRuntime::_generic_array_copy_ctr); |
|
1705 |
||
1706 |
// Input values |
|
1707 |
Address SRC (rsp, 12+ 4); |
|
1708 |
Address SRC_POS (rsp, 12+ 8); |
|
1709 |
Address DST (rsp, 12+12); |
|
1710 |
Address DST_POS (rsp, 12+16); |
|
1711 |
Address LENGTH (rsp, 12+20); |
|
1712 |
||
1713 |
//----------------------------------------------------------------------- |
|
1714 |
// Assembler stub will be used for this call to arraycopy |
|
1715 |
// if the following conditions are met: |
|
1716 |
// |
|
1717 |
// (1) src and dst must not be null. |
|
1718 |
// (2) src_pos must not be negative. |
|
1719 |
// (3) dst_pos must not be negative. |
|
1720 |
// (4) length must not be negative. |
|
1721 |
// (5) src klass and dst klass should be the same and not NULL. |
|
1722 |
// (6) src and dst should be arrays. |
|
1723 |
// (7) src_pos + length must not exceed length of src. |
|
1724 |
// (8) dst_pos + length must not exceed length of dst. |
|
1725 |
// |
|
1726 |
||
1727 |
const Register src = rax; // source array oop |
|
1728 |
const Register src_pos = rsi; |
|
1729 |
const Register dst = rdx; // destination array oop |
|
1730 |
const Register dst_pos = rdi; |
|
1731 |
const Register length = rcx; // transfer count |
|
1732 |
||
1733 |
// if (src == NULL) return -1; |
|
1066 | 1734 |
__ movptr(src, SRC); // src oop |
1735 |
__ testptr(src, src); |
|
1 | 1736 |
__ jccb(Assembler::zero, L_failed_0); |
1737 |
||
1738 |
// if (src_pos < 0) return -1; |
|
1066 | 1739 |
__ movl2ptr(src_pos, SRC_POS); // src_pos |
1 | 1740 |
__ testl(src_pos, src_pos); |
1741 |
__ jccb(Assembler::negative, L_failed_0); |
|
1742 |
||
1743 |
// if (dst == NULL) return -1; |
|
1066 | 1744 |
__ movptr(dst, DST); // dst oop |
1745 |
__ testptr(dst, dst); |
|
1 | 1746 |
__ jccb(Assembler::zero, L_failed_0); |
1747 |
||
1748 |
// if (dst_pos < 0) return -1; |
|
1066 | 1749 |
__ movl2ptr(dst_pos, DST_POS); // dst_pos |
1 | 1750 |
__ testl(dst_pos, dst_pos); |
1751 |
__ jccb(Assembler::negative, L_failed_0); |
|
1752 |
||
1753 |
// if (length < 0) return -1; |
|
1066 | 1754 |
__ movl2ptr(length, LENGTH); // length |
1 | 1755 |
__ testl(length, length); |
1756 |
__ jccb(Assembler::negative, L_failed_0); |
|
1757 |
||
1758 |
// if (src->klass() == NULL) return -1; |
|
1759 |
Address src_klass_addr(src, oopDesc::klass_offset_in_bytes()); |
|
1760 |
Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes()); |
|
1761 |
const Register rcx_src_klass = rcx; // array klass |
|
1066 | 1762 |
__ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes())); |
1 | 1763 |
|
1764 |
#ifdef ASSERT |
|
1765 |
// assert(src->klass() != NULL); |
|
1766 |
BLOCK_COMMENT("assert klasses not null"); |
|
1767 |
{ Label L1, L2; |
|
1066 | 1768 |
__ testptr(rcx_src_klass, rcx_src_klass); |
1 | 1769 |
__ jccb(Assembler::notZero, L2); // it is broken if klass is NULL |
1770 |
__ bind(L1); |
|
1771 |
__ stop("broken null klass"); |
|
1772 |
__ bind(L2); |
|
1066 | 1773 |
__ cmpptr(dst_klass_addr, (int32_t)NULL_WORD); |
1 | 1774 |
__ jccb(Assembler::equal, L1); // this would be broken also |
1775 |
BLOCK_COMMENT("assert done"); |
|
1776 |
} |
|
1777 |
#endif //ASSERT |
|
1778 |
||
1779 |
// Load layout helper (32-bits) |
|
1780 |
// |
|
1781 |
// |array_tag| | header_size | element_type | |log2_element_size| |
|
1782 |
// 32 30 24 16 8 2 0 |
|
1783 |
// |
|
1784 |
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 |
|
1785 |
// |
|
1786 |
||
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1787 |
int lh_offset = in_bytes(Klass::layout_helper_offset()); |
1 | 1788 |
Address src_klass_lh_addr(rcx_src_klass, lh_offset); |
1789 |
||
1790 |
// Handle objArrays completely differently... |
|
1791 |
jint objArray_lh = Klass::array_layout_helper(T_OBJECT); |
|
1792 |
__ cmpl(src_klass_lh_addr, objArray_lh); |
|
1793 |
__ jcc(Assembler::equal, L_objArray); |
|
1794 |
||
1795 |
// if (src->klass() != dst->klass()) return -1; |
|
1066 | 1796 |
__ cmpptr(rcx_src_klass, dst_klass_addr); |
1 | 1797 |
__ jccb(Assembler::notEqual, L_failed_0); |
1798 |
||
1799 |
const Register rcx_lh = rcx; // layout helper |
|
1800 |
assert(rcx_lh == rcx_src_klass, "known alias"); |
|
1801 |
__ movl(rcx_lh, src_klass_lh_addr); |
|
1802 |
||
1803 |
// if (!src->is_Array()) return -1; |
|
1804 |
__ cmpl(rcx_lh, Klass::_lh_neutral_value); |
|
1805 |
__ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp |
|
1806 |
||
1807 |
// At this point, it is known to be a typeArray (array_tag 0x3). |
|
1808 |
#ifdef ASSERT |
|
1809 |
{ Label L; |
|
1810 |
__ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); |
|
1811 |
__ jcc(Assembler::greaterEqual, L); // signed cmp |
|
1812 |
__ stop("must be a primitive array"); |
|
1813 |
__ bind(L); |
|
1814 |
} |
|
1815 |
#endif |
|
1816 |
||
1817 |
assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); |
|
1818 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
|
1819 |
||
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1820 |
// TypeArrayKlass |
1 | 1821 |
// |
1822 |
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); |
|
1823 |
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); |
|
1824 |
// |
|
1825 |
const Register rsi_offset = rsi; // array offset |
|
1826 |
const Register src_array = src; // src array offset |
|
1827 |
const Register dst_array = dst; // dst array offset |
|
1828 |
const Register rdi_elsize = rdi; // log2 element size |
|
1829 |
||
1066 | 1830 |
__ mov(rsi_offset, rcx_lh); |
1831 |
__ shrptr(rsi_offset, Klass::_lh_header_size_shift); |
|
1832 |
__ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset |
|
1833 |
__ addptr(src_array, rsi_offset); // src array offset |
|
1834 |
__ addptr(dst_array, rsi_offset); // dst array offset |
|
1835 |
__ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize |
|
1 | 1836 |
|
1837 |
// next registers should be set before the jump to corresponding stub |
|
1838 |
const Register from = src; // source array address |
|
1839 |
const Register to = dst; // destination array address |
|
1840 |
const Register count = rcx; // elements count |
|
1841 |
// some of them should be duplicated on stack |
|
1842 |
#define FROM Address(rsp, 12+ 4) |
|
1843 |
#define TO Address(rsp, 12+ 8) // Not used now |
|
1844 |
#define COUNT Address(rsp, 12+12) // Only for oop arraycopy |
|
1845 |
||
1846 |
BLOCK_COMMENT("scale indexes to element size"); |
|
1066 | 1847 |
__ movl2ptr(rsi, SRC_POS); // src_pos |
1848 |
__ shlptr(rsi); // src_pos << rcx (log2 elsize) |
|
1 | 1849 |
assert(src_array == from, ""); |
1066 | 1850 |
__ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize |
1851 |
__ movl2ptr(rdi, DST_POS); // dst_pos |
|
1852 |
__ shlptr(rdi); // dst_pos << rcx (log2 elsize) |
|
1 | 1853 |
assert(dst_array == to, ""); |
1066 | 1854 |
__ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize |
1855 |
__ movptr(FROM, from); // src_addr |
|
1856 |
__ mov(rdi_elsize, rcx_lh); // log2 elsize |
|
1857 |
__ movl2ptr(count, LENGTH); // elements count |
|
1 | 1858 |
|
1859 |
BLOCK_COMMENT("choose copy loop based on element size"); |
|
1860 |
__ cmpl(rdi_elsize, 0); |
|
1861 |
||
1862 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy)); |
|
1863 |
__ cmpl(rdi_elsize, LogBytesPerShort); |
|
1864 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy)); |
|
1865 |
__ cmpl(rdi_elsize, LogBytesPerInt); |
|
1866 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy)); |
|
1867 |
#ifdef ASSERT |
|
1868 |
__ cmpl(rdi_elsize, LogBytesPerLong); |
|
1869 |
__ jccb(Assembler::notEqual, L_failed); |
|
1870 |
#endif |
|
1066 | 1871 |
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1872 |
__ pop(rsi); |
|
1 | 1873 |
__ jump(RuntimeAddress(entry_jlong_arraycopy)); |
1874 |
||
1875 |
__ BIND(L_failed); |
|
1066 | 1876 |
__ xorptr(rax, rax); |
1877 |
__ notptr(rax); // return -1 |
|
1878 |
__ pop(rdi); |
|
1879 |
__ pop(rsi); |
|
1 | 1880 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1881 |
__ ret(0); |
|
1882 |
||
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1883 |
// ObjArrayKlass |
1 | 1884 |
__ BIND(L_objArray); |
1885 |
// live at this point: rcx_src_klass, src[_pos], dst[_pos] |
|
1886 |
||
1887 |
Label L_plain_copy, L_checkcast_copy; |
|
1888 |
// test array classes for subtyping |
|
1066 | 1889 |
__ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality |
1 | 1890 |
__ jccb(Assembler::notEqual, L_checkcast_copy); |
1891 |
||
1892 |
// Identically typed arrays can be copied without element-wise checks. |
|
1893 |
assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass); |
|
1894 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
|
1895 |
||
1896 |
__ BIND(L_plain_copy); |
|
1066 | 1897 |
__ movl2ptr(count, LENGTH); // elements count |
1898 |
__ movl2ptr(src_pos, SRC_POS); // reload src_pos |
|
1899 |
__ lea(from, Address(src, src_pos, Address::times_ptr, |
|
1900 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr |
|
1901 |
__ movl2ptr(dst_pos, DST_POS); // reload dst_pos |
|
1902 |
__ lea(to, Address(dst, dst_pos, Address::times_ptr, |
|
1903 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr |
|
1904 |
__ movptr(FROM, from); // src_addr |
|
1905 |
__ movptr(TO, to); // dst_addr |
|
1 | 1906 |
__ movl(COUNT, count); // count |
1907 |
__ jump(RuntimeAddress(entry_oop_arraycopy)); |
|
1908 |
||
1909 |
__ BIND(L_checkcast_copy); |
|
1910 |
// live at this point: rcx_src_klass, dst[_pos], src[_pos] |
|
1911 |
{ |
|
1912 |
// Handy offsets: |
|
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1913 |
int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); |
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1914 |
int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
1 | 1915 |
|
1916 |
Register rsi_dst_klass = rsi; |
|
1917 |
Register rdi_temp = rdi; |
|
1918 |
assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); |
|
1919 |
assert(rdi_temp == dst_pos, "expected alias w/ dst_pos"); |
|
1920 |
Address dst_klass_lh_addr(rsi_dst_klass, lh_offset); |
|
1921 |
||
1922 |
// Before looking at dst.length, make sure dst is also an objArray. |
|
1066 | 1923 |
__ movptr(rsi_dst_klass, dst_klass_addr); |
1 | 1924 |
__ cmpl(dst_klass_lh_addr, objArray_lh); |
1925 |
__ jccb(Assembler::notEqual, L_failed); |
|
1926 |
||
1927 |
// It is safe to examine both src.length and dst.length. |
|
1066 | 1928 |
__ movl2ptr(src_pos, SRC_POS); // reload rsi |
1 | 1929 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
1930 |
// (Now src_pos and dst_pos are killed, but not src and dst.) |
|
1931 |
||
1932 |
// We'll need this temp (don't forget to pop it after the type check). |
|
1066 | 1933 |
__ push(rbx); |
1 | 1934 |
Register rbx_src_klass = rbx; |
1935 |
||
1066 | 1936 |
__ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx |
1937 |
__ movptr(rsi_dst_klass, dst_klass_addr); |
|
1 | 1938 |
Address super_check_offset_addr(rsi_dst_klass, sco_offset); |
1939 |
Label L_fail_array_check; |
|
1940 |
generate_type_check(rbx_src_klass, |
|
1941 |
super_check_offset_addr, dst_klass_addr, |
|
1942 |
rdi_temp, NULL, &L_fail_array_check); |
|
1943 |
// (On fall-through, we have passed the array type check.) |
|
1066 | 1944 |
__ pop(rbx); |
1 | 1945 |
__ jmp(L_plain_copy); |
1946 |
||
1947 |
__ BIND(L_fail_array_check); |
|
1948 |
// Reshuffle arguments so we can call checkcast_arraycopy: |
|
1949 |
||
1950 |
// match initial saves for checkcast_arraycopy |
|
1066 | 1951 |
// push(rsi); // already done; see above |
1952 |
// push(rdi); // already done; see above |
|
1953 |
// push(rbx); // already done; see above |
|
1 | 1954 |
|
1955 |
// Marshal outgoing arguments now, freeing registers. |
|
1956 |
Address from_arg(rsp, 16+ 4); // from |
|
1957 |
Address to_arg(rsp, 16+ 8); // to |
|
1958 |
Address length_arg(rsp, 16+12); // elements count |
|
1959 |
Address ckoff_arg(rsp, 16+16); // super_check_offset |
|
1960 |
Address ckval_arg(rsp, 16+20); // super_klass |
|
1961 |
||
1962 |
Address SRC_POS_arg(rsp, 16+ 8); |
|
1963 |
Address DST_POS_arg(rsp, 16+16); |
|
1964 |
Address LENGTH_arg(rsp, 16+20); |
|
1965 |
// push rbx, changed the incoming offsets (why not just use rbp,??) |
|
1966 |
// assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, ""); |
|
1967 |
||
1066 | 1968 |
__ movptr(rbx, Address(rsi_dst_klass, ek_offset)); |
1969 |
__ movl2ptr(length, LENGTH_arg); // reload elements count |
|
1970 |
__ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos |
|
1971 |
__ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos |
|
1 | 1972 |
|
1066 | 1973 |
__ movptr(ckval_arg, rbx); // destination element type |
1 | 1974 |
__ movl(rbx, Address(rbx, sco_offset)); |
1975 |
__ movl(ckoff_arg, rbx); // corresponding class check offset |
|
1976 |
||
1977 |
__ movl(length_arg, length); // outgoing length argument |
|
1978 |
||
1066 | 1979 |
__ lea(from, Address(src, src_pos, Address::times_ptr, |
1 | 1980 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
1066 | 1981 |
__ movptr(from_arg, from); |
1 | 1982 |
|
1066 | 1983 |
__ lea(to, Address(dst, dst_pos, Address::times_ptr, |
1 | 1984 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
1066 | 1985 |
__ movptr(to_arg, to); |
1 | 1986 |
__ jump(RuntimeAddress(entry_checkcast_arraycopy)); |
1987 |
} |
|
1988 |
||
1989 |
return start; |
|
1990 |
} |
|
1991 |
||
1992 |
void generate_arraycopy_stubs() { |
|
1993 |
address entry; |
|
1994 |
address entry_jbyte_arraycopy; |
|
1995 |
address entry_jshort_arraycopy; |
|
1996 |
address entry_jint_arraycopy; |
|
1997 |
address entry_oop_arraycopy; |
|
1998 |
address entry_jlong_arraycopy; |
|
1999 |
address entry_checkcast_arraycopy; |
|
2000 |
||
2001 |
StubRoutines::_arrayof_jbyte_disjoint_arraycopy = |
|
2002 |
generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry, |
|
2003 |
"arrayof_jbyte_disjoint_arraycopy"); |
|
2004 |
StubRoutines::_arrayof_jbyte_arraycopy = |
|
2005 |
generate_conjoint_copy(T_BYTE, true, Address::times_1, entry, |
|
2006 |
NULL, "arrayof_jbyte_arraycopy"); |
|
2007 |
StubRoutines::_jbyte_disjoint_arraycopy = |
|
2008 |
generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry, |
|
2009 |
"jbyte_disjoint_arraycopy"); |
|
2010 |
StubRoutines::_jbyte_arraycopy = |
|
2011 |
generate_conjoint_copy(T_BYTE, false, Address::times_1, entry, |
|
2012 |
&entry_jbyte_arraycopy, "jbyte_arraycopy"); |
|
2013 |
||
2014 |
StubRoutines::_arrayof_jshort_disjoint_arraycopy = |
|
2015 |
generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry, |
|
2016 |
"arrayof_jshort_disjoint_arraycopy"); |
|
2017 |
StubRoutines::_arrayof_jshort_arraycopy = |
|
2018 |
generate_conjoint_copy(T_SHORT, true, Address::times_2, entry, |
|
2019 |
NULL, "arrayof_jshort_arraycopy"); |
|
2020 |
StubRoutines::_jshort_disjoint_arraycopy = |
|
2021 |
generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry, |
|
2022 |
"jshort_disjoint_arraycopy"); |
|
2023 |
StubRoutines::_jshort_arraycopy = |
|
2024 |
generate_conjoint_copy(T_SHORT, false, Address::times_2, entry, |
|
2025 |
&entry_jshort_arraycopy, "jshort_arraycopy"); |
|
2026 |
||
2027 |
// Next arrays are always aligned on 4 bytes at least. |
|
2028 |
StubRoutines::_jint_disjoint_arraycopy = |
|
2029 |
generate_disjoint_copy(T_INT, true, Address::times_4, &entry, |
|
2030 |
"jint_disjoint_arraycopy"); |
|
2031 |
StubRoutines::_jint_arraycopy = |
|
2032 |
generate_conjoint_copy(T_INT, true, Address::times_4, entry, |
|
2033 |
&entry_jint_arraycopy, "jint_arraycopy"); |
|
2034 |
||
2035 |
StubRoutines::_oop_disjoint_arraycopy = |
|
1066 | 2036 |
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
1 | 2037 |
"oop_disjoint_arraycopy"); |
2038 |
StubRoutines::_oop_arraycopy = |
|
1066 | 2039 |
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
1 | 2040 |
&entry_oop_arraycopy, "oop_arraycopy"); |
2041 |
||
8498 | 2042 |
StubRoutines::_oop_disjoint_arraycopy_uninit = |
2043 |
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
|
2044 |
"oop_disjoint_arraycopy_uninit", |
|
2045 |
/*dest_uninitialized*/true); |
|
2046 |
StubRoutines::_oop_arraycopy_uninit = |
|
2047 |
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
|
2048 |
NULL, "oop_arraycopy_uninit", |
|
2049 |
/*dest_uninitialized*/true); |
|
2050 |
||
1 | 2051 |
StubRoutines::_jlong_disjoint_arraycopy = |
2052 |
generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy"); |
|
2053 |
StubRoutines::_jlong_arraycopy = |
|
2054 |
generate_conjoint_long_copy(entry, &entry_jlong_arraycopy, |
|
2055 |
"jlong_arraycopy"); |
|
2056 |
||
6433 | 2057 |
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); |
2058 |
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); |
|
2059 |
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); |
|
2060 |
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); |
|
2061 |
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); |
|
2062 |
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); |
|
2063 |
||
8498 | 2064 |
StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; |
2065 |
StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; |
|
2066 |
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; |
|
2067 |
StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; |
|
1 | 2068 |
|
8498 | 2069 |
StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; |
2070 |
StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; |
|
2071 |
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; |
|
2072 |
StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; |
|
1 | 2073 |
|
2074 |
StubRoutines::_checkcast_arraycopy = |
|
8498 | 2075 |
generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); |
2076 |
StubRoutines::_checkcast_arraycopy_uninit = |
|
2077 |
generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true); |
|
1 | 2078 |
|
2079 |
StubRoutines::_unsafe_arraycopy = |
|
2080 |
generate_unsafe_copy("unsafe_arraycopy", |
|
2081 |
entry_jbyte_arraycopy, |
|
2082 |
entry_jshort_arraycopy, |
|
2083 |
entry_jint_arraycopy, |
|
2084 |
entry_jlong_arraycopy); |
|
2085 |
||
2086 |
StubRoutines::_generic_arraycopy = |
|
2087 |
generate_generic_copy("generic_arraycopy", |
|
2088 |
entry_jbyte_arraycopy, |
|
2089 |
entry_jshort_arraycopy, |
|
2090 |
entry_jint_arraycopy, |
|
2091 |
entry_oop_arraycopy, |
|
2092 |
entry_jlong_arraycopy, |
|
2093 |
entry_checkcast_arraycopy); |
|
2094 |
} |
|
2095 |
||
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2096 |
void generate_math_stubs() { |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2097 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2098 |
StubCodeMark mark(this, "StubRoutines", "log10"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2099 |
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2100 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2101 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2102 |
__ flog10(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2103 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2104 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2105 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2106 |
StubCodeMark mark(this, "StubRoutines", "sin"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2107 |
StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2108 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2109 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2110 |
__ trigfunc('s'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2111 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2112 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2113 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2114 |
StubCodeMark mark(this, "StubRoutines", "cos"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2115 |
StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2116 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2117 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2118 |
__ trigfunc('c'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2119 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2120 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2121 |
{ |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2122 |
StubCodeMark mark(this, "StubRoutines", "tan"); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2123 |
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2124 |
|
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2125 |
__ fld_d(Address(rsp, 4)); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2126 |
__ trigfunc('t'); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2127 |
__ ret(0); |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2128 |
} |
12739
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2129 |
{ |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2130 |
StubCodeMark mark(this, "StubRoutines", "pow"); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2131 |
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2132 |
|
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2133 |
__ fld_d(Address(rsp, 12)); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2134 |
__ fld_d(Address(rsp, 4)); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2135 |
__ pow_with_fallback(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2136 |
__ ret(0); |
09f26b73ae66
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
11439
diff
changeset
|
2137 |
} |
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2138 |
} |
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
2139 |
|
14132 | 2140 |
// AES intrinsic stubs |
2141 |
enum {AESBlockSize = 16}; |
|
2142 |
||
2143 |
address generate_key_shuffle_mask() { |
|
2144 |
__ align(16); |
|
2145 |
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); |
|
2146 |
address start = __ pc(); |
|
2147 |
__ emit_data(0x00010203, relocInfo::none, 0 ); |
|
2148 |
__ emit_data(0x04050607, relocInfo::none, 0 ); |
|
2149 |
__ emit_data(0x08090a0b, relocInfo::none, 0 ); |
|
2150 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); |
|
2151 |
return start; |
|
2152 |
} |
|
2153 |
||
2154 |
// Utility routine for loading a 128-bit key word in little endian format |
|
2155 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2156 |
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2157 |
__ movdqu(xmmdst, Address(key, offset)); |
|
2158 |
if (xmm_shuf_mask != NULL) { |
|
2159 |
__ pshufb(xmmdst, xmm_shuf_mask); |
|
2160 |
} else { |
|
2161 |
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2162 |
} |
|
2163 |
} |
|
2164 |
||
2165 |
// aesenc using specified key+offset |
|
2166 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2167 |
void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2168 |
load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2169 |
__ aesenc(xmmdst, xmmtmp); |
|
2170 |
} |
|
2171 |
||
2172 |
// aesdec using specified key+offset |
|
2173 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2174 |
void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2175 |
load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2176 |
__ aesdec(xmmdst, xmmtmp); |
|
2177 |
} |
|
2178 |
||
2179 |
||
2180 |
// Arguments: |
|
2181 |
// |
|
2182 |
// Inputs: |
|
2183 |
// c_rarg0 - source byte array address |
|
2184 |
// c_rarg1 - destination byte array address |
|
2185 |
// c_rarg2 - K (key) in little endian int array |
|
2186 |
// |
|
2187 |
address generate_aescrypt_encryptBlock() { |
|
14834 | 2188 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2189 |
__ align(CodeEntryAlignment); |
2190 |
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
|
2191 |
Label L_doLast; |
|
2192 |
address start = __ pc(); |
|
2193 |
||
14834 | 2194 |
const Register from = rdx; // source array address |
14132 | 2195 |
const Register to = rdx; // destination array address |
2196 |
const Register key = rcx; // key array address |
|
2197 |
const Register keylen = rax; |
|
2198 |
const Address from_param(rbp, 8+0); |
|
2199 |
const Address to_param (rbp, 8+4); |
|
2200 |
const Address key_param (rbp, 8+8); |
|
2201 |
||
2202 |
const XMMRegister xmm_result = xmm0; |
|
14834 | 2203 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
2204 |
const XMMRegister xmm_temp1 = xmm2; |
|
2205 |
const XMMRegister xmm_temp2 = xmm3; |
|
2206 |
const XMMRegister xmm_temp3 = xmm4; |
|
2207 |
const XMMRegister xmm_temp4 = xmm5; |
|
2208 |
||
2209 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2210 |
|
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2211 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2212 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2213 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2214 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2215 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2216 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2217 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2218 |
|
14834 | 2219 |
__ movptr(from, from_param); |
2220 |
__ movptr(key, key_param); |
|
2221 |
||
2222 |
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
14132 | 2223 |
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2224 |
||
2225 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2226 |
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
|
14834 | 2227 |
__ movptr(to, to_param); |
14132 | 2228 |
|
2229 |
// For encryption, the java expanded key ordering is just what we need |
|
2230 |
||
14834 | 2231 |
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); |
2232 |
__ pxor(xmm_result, xmm_temp1); |
|
2233 |
||
2234 |
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
|
2235 |
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
|
2236 |
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
|
2237 |
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
|
2238 |
||
2239 |
__ aesenc(xmm_result, xmm_temp1); |
|
2240 |
__ aesenc(xmm_result, xmm_temp2); |
|
2241 |
__ aesenc(xmm_result, xmm_temp3); |
|
2242 |
__ aesenc(xmm_result, xmm_temp4); |
|
2243 |
||
2244 |
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
|
2245 |
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
|
2246 |
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2247 |
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2248 |
||
2249 |
__ aesenc(xmm_result, xmm_temp1); |
|
2250 |
__ aesenc(xmm_result, xmm_temp2); |
|
2251 |
__ aesenc(xmm_result, xmm_temp3); |
|
2252 |
__ aesenc(xmm_result, xmm_temp4); |
|
2253 |
||
2254 |
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2255 |
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2256 |
||
2257 |
__ cmpl(keylen, 44); |
|
2258 |
__ jccb(Assembler::equal, L_doLast); |
|
2259 |
||
2260 |
__ aesenc(xmm_result, xmm_temp1); |
|
2261 |
__ aesenc(xmm_result, xmm_temp2); |
|
2262 |
||
2263 |
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2264 |
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2265 |
||
2266 |
__ cmpl(keylen, 52); |
|
2267 |
__ jccb(Assembler::equal, L_doLast); |
|
2268 |
||
2269 |
__ aesenc(xmm_result, xmm_temp1); |
|
2270 |
__ aesenc(xmm_result, xmm_temp2); |
|
2271 |
||
2272 |
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2273 |
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
|
14132 | 2274 |
|
2275 |
__ BIND(L_doLast); |
|
14834 | 2276 |
__ aesenc(xmm_result, xmm_temp1); |
2277 |
__ aesenclast(xmm_result, xmm_temp2); |
|
14132 | 2278 |
__ movdqu(Address(to, 0), xmm_result); // store the result |
2279 |
__ xorptr(rax, rax); // return 0 |
|
2280 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2281 |
__ ret(0); |
|
2282 |
||
2283 |
return start; |
|
2284 |
} |
|
2285 |
||
2286 |
||
2287 |
// Arguments: |
|
2288 |
// |
|
2289 |
// Inputs: |
|
2290 |
// c_rarg0 - source byte array address |
|
2291 |
// c_rarg1 - destination byte array address |
|
2292 |
// c_rarg2 - K (key) in little endian int array |
|
2293 |
// |
|
2294 |
address generate_aescrypt_decryptBlock() { |
|
14834 | 2295 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2296 |
__ align(CodeEntryAlignment); |
2297 |
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
|
2298 |
Label L_doLast; |
|
2299 |
address start = __ pc(); |
|
2300 |
||
14834 | 2301 |
const Register from = rdx; // source array address |
14132 | 2302 |
const Register to = rdx; // destination array address |
2303 |
const Register key = rcx; // key array address |
|
2304 |
const Register keylen = rax; |
|
2305 |
const Address from_param(rbp, 8+0); |
|
2306 |
const Address to_param (rbp, 8+4); |
|
2307 |
const Address key_param (rbp, 8+8); |
|
2308 |
||
2309 |
const XMMRegister xmm_result = xmm0; |
|
14834 | 2310 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
2311 |
const XMMRegister xmm_temp1 = xmm2; |
|
2312 |
const XMMRegister xmm_temp2 = xmm3; |
|
2313 |
const XMMRegister xmm_temp3 = xmm4; |
|
2314 |
const XMMRegister xmm_temp4 = xmm5; |
|
14132 | 2315 |
|
2316 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2317 |
|
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2318 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2319 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2320 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2321 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2322 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2323 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2324 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2325 |
|
14834 | 2326 |
__ movptr(from, from_param); |
2327 |
__ movptr(key, key_param); |
|
2328 |
||
2329 |
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
14132 | 2330 |
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2331 |
||
2332 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2333 |
__ movdqu(xmm_result, Address(from, 0)); |
|
14834 | 2334 |
__ movptr(to, to_param); |
14132 | 2335 |
|
2336 |
// for decryption java expanded key ordering is rotated one position from what we want |
|
2337 |
// so we start from 0x10 here and hit 0x00 last |
|
2338 |
// we don't know if the key is aligned, hence not using load-execute form |
|
14834 | 2339 |
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2340 |
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
|
2341 |
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
|
2342 |
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
|
2343 |
||
2344 |
__ pxor (xmm_result, xmm_temp1); |
|
2345 |
__ aesdec(xmm_result, xmm_temp2); |
|
2346 |
__ aesdec(xmm_result, xmm_temp3); |
|
2347 |
__ aesdec(xmm_result, xmm_temp4); |
|
2348 |
||
2349 |
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
|
2350 |
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
|
2351 |
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2352 |
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2353 |
||
2354 |
__ aesdec(xmm_result, xmm_temp1); |
|
2355 |
__ aesdec(xmm_result, xmm_temp2); |
|
2356 |
__ aesdec(xmm_result, xmm_temp3); |
|
2357 |
__ aesdec(xmm_result, xmm_temp4); |
|
2358 |
||
2359 |
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2360 |
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2361 |
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); |
|
2362 |
||
2363 |
__ cmpl(keylen, 44); |
|
2364 |
__ jccb(Assembler::equal, L_doLast); |
|
2365 |
||
2366 |
__ aesdec(xmm_result, xmm_temp1); |
|
2367 |
__ aesdec(xmm_result, xmm_temp2); |
|
2368 |
||
2369 |
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2370 |
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2371 |
||
2372 |
__ cmpl(keylen, 52); |
|
2373 |
__ jccb(Assembler::equal, L_doLast); |
|
2374 |
||
2375 |
__ aesdec(xmm_result, xmm_temp1); |
|
2376 |
__ aesdec(xmm_result, xmm_temp2); |
|
2377 |
||
2378 |
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2379 |
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
|
14132 | 2380 |
|
2381 |
__ BIND(L_doLast); |
|
14834 | 2382 |
__ aesdec(xmm_result, xmm_temp1); |
2383 |
__ aesdec(xmm_result, xmm_temp2); |
|
2384 |
||
14132 | 2385 |
// for decryption the aesdeclast operation is always on key+0x00 |
14834 | 2386 |
__ aesdeclast(xmm_result, xmm_temp3); |
14132 | 2387 |
__ movdqu(Address(to, 0), xmm_result); // store the result |
2388 |
__ xorptr(rax, rax); // return 0 |
|
2389 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2390 |
__ ret(0); |
|
2391 |
||
2392 |
return start; |
|
2393 |
} |
|
2394 |
||
2395 |
void handleSOERegisters(bool saving) { |
|
2396 |
const int saveFrameSizeInBytes = 4 * wordSize; |
|
2397 |
const Address saved_rbx (rbp, -3 * wordSize); |
|
2398 |
const Address saved_rsi (rbp, -2 * wordSize); |
|
2399 |
const Address saved_rdi (rbp, -1 * wordSize); |
|
2400 |
||
2401 |
if (saving) { |
|
2402 |
__ subptr(rsp, saveFrameSizeInBytes); |
|
2403 |
__ movptr(saved_rsi, rsi); |
|
2404 |
__ movptr(saved_rdi, rdi); |
|
2405 |
__ movptr(saved_rbx, rbx); |
|
2406 |
} else { |
|
2407 |
// restoring |
|
2408 |
__ movptr(rsi, saved_rsi); |
|
2409 |
__ movptr(rdi, saved_rdi); |
|
2410 |
__ movptr(rbx, saved_rbx); |
|
2411 |
} |
|
2412 |
} |
|
2413 |
||
2414 |
// Arguments: |
|
2415 |
// |
|
2416 |
// Inputs: |
|
2417 |
// c_rarg0 - source byte array address |
|
2418 |
// c_rarg1 - destination byte array address |
|
2419 |
// c_rarg2 - K (key) in little endian int array |
|
2420 |
// c_rarg3 - r vector byte array address |
|
2421 |
// c_rarg4 - input length |
|
2422 |
// |
|
22505 | 2423 |
// Output: |
2424 |
// rax - input length |
|
2425 |
// |
|
14132 | 2426 |
address generate_cipherBlockChaining_encryptAESCrypt() { |
14834 | 2427 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2428 |
__ align(CodeEntryAlignment); |
2429 |
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); |
|
2430 |
address start = __ pc(); |
|
2431 |
||
2432 |
Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; |
|
2433 |
const Register from = rsi; // source array address |
|
2434 |
const Register to = rdx; // destination array address |
|
2435 |
const Register key = rcx; // key array address |
|
2436 |
const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2437 |
// and left with the results of the last encryption block |
|
2438 |
const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2439 |
const Register pos = rax; |
|
2440 |
||
2441 |
// xmm register assignments for the loops below |
|
2442 |
const XMMRegister xmm_result = xmm0; |
|
2443 |
const XMMRegister xmm_temp = xmm1; |
|
2444 |
// first 6 keys preloaded into xmm2-xmm7 |
|
2445 |
const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2446 |
const int XMM_REG_NUM_KEY_LAST = 7; |
|
2447 |
const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2448 |
||
2449 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2450 |
handleSOERegisters(true /*saving*/); |
|
2451 |
||
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2452 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2453 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2454 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2455 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2456 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2457 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2458 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2459 |
|
14132 | 2460 |
// load registers from incoming parameters |
2461 |
const Address from_param(rbp, 8+0); |
|
2462 |
const Address to_param (rbp, 8+4); |
|
2463 |
const Address key_param (rbp, 8+8); |
|
2464 |
const Address rvec_param (rbp, 8+12); |
|
2465 |
const Address len_param (rbp, 8+16); |
|
2466 |
__ movptr(from , from_param); |
|
2467 |
__ movptr(to , to_param); |
|
2468 |
__ movptr(key , key_param); |
|
2469 |
__ movptr(rvec , rvec_param); |
|
2470 |
__ movptr(len_reg , len_param); |
|
2471 |
||
2472 |
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front |
|
2473 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2474 |
// load up xmm regs 2 thru 7 with keys 0-5 |
|
2475 |
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2476 |
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2477 |
offset += 0x10; |
|
2478 |
} |
|
2479 |
||
2480 |
__ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec |
|
2481 |
||
2482 |
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2483 |
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2484 |
__ cmpl(rax, 44); |
|
2485 |
__ jcc(Assembler::notEqual, L_key_192_256); |
|
2486 |
||
2487 |
// 128 bit code follows here |
|
14834 | 2488 |
__ movl(pos, 0); |
14132 | 2489 |
__ align(OptoLoopAlignment); |
2490 |
__ BIND(L_loopTop_128); |
|
2491 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
|
2492 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2493 |
||
2494 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2495 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2496 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2497 |
} |
|
2498 |
for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { |
|
2499 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2500 |
} |
|
2501 |
load_key(xmm_temp, key, 0xa0); |
|
2502 |
__ aesenclast(xmm_result, xmm_temp); |
|
2503 |
||
2504 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2505 |
// no need to store r to memory until we exit |
|
2506 |
__ addptr(pos, AESBlockSize); |
|
2507 |
__ subptr(len_reg, AESBlockSize); |
|
2508 |
__ jcc(Assembler::notEqual, L_loopTop_128); |
|
2509 |
||
2510 |
__ BIND(L_exit); |
|
2511 |
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object |
|
2512 |
||
2513 |
handleSOERegisters(false /*restoring*/); |
|
22505 | 2514 |
__ movptr(rax, len_param); // return length |
14132 | 2515 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
2516 |
__ ret(0); |
|
2517 |
||
14834 | 2518 |
__ BIND(L_key_192_256); |
2519 |
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
14132 | 2520 |
__ cmpl(rax, 52); |
2521 |
__ jcc(Assembler::notEqual, L_key_256); |
|
2522 |
||
2523 |
// 192-bit code follows here (could be changed to use more xmm registers) |
|
14834 | 2524 |
__ movl(pos, 0); |
2525 |
__ align(OptoLoopAlignment); |
|
2526 |
__ BIND(L_loopTop_192); |
|
14132 | 2527 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2528 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2529 |
||
2530 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2531 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2532 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2533 |
} |
|
2534 |
for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { |
|
2535 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2536 |
} |
|
2537 |
load_key(xmm_temp, key, 0xc0); |
|
2538 |
__ aesenclast(xmm_result, xmm_temp); |
|
2539 |
||
2540 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2541 |
// no need to store r to memory until we exit |
|
2542 |
__ addptr(pos, AESBlockSize); |
|
2543 |
__ subptr(len_reg, AESBlockSize); |
|
2544 |
__ jcc(Assembler::notEqual, L_loopTop_192); |
|
2545 |
__ jmp(L_exit); |
|
2546 |
||
14834 | 2547 |
__ BIND(L_key_256); |
14132 | 2548 |
// 256-bit code follows here (could be changed to use more xmm registers) |
14834 | 2549 |
__ movl(pos, 0); |
2550 |
__ align(OptoLoopAlignment); |
|
2551 |
__ BIND(L_loopTop_256); |
|
14132 | 2552 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2553 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2554 |
||
2555 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2556 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2557 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2558 |
} |
|
2559 |
for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { |
|
2560 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2561 |
} |
|
2562 |
load_key(xmm_temp, key, 0xe0); |
|
2563 |
__ aesenclast(xmm_result, xmm_temp); |
|
2564 |
||
2565 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2566 |
// no need to store r to memory until we exit |
|
2567 |
__ addptr(pos, AESBlockSize); |
|
2568 |
__ subptr(len_reg, AESBlockSize); |
|
2569 |
__ jcc(Assembler::notEqual, L_loopTop_256); |
|
2570 |
__ jmp(L_exit); |
|
2571 |
||
2572 |
return start; |
|
2573 |
} |
|
2574 |
||
2575 |
||
2576 |
// CBC AES Decryption. |
|
2577 |
// In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. |
|
2578 |
// |
|
2579 |
// Arguments: |
|
2580 |
// |
|
2581 |
// Inputs: |
|
2582 |
// c_rarg0 - source byte array address |
|
2583 |
// c_rarg1 - destination byte array address |
|
2584 |
// c_rarg2 - K (key) in little endian int array |
|
2585 |
// c_rarg3 - r vector byte array address |
|
2586 |
// c_rarg4 - input length |
|
2587 |
// |
|
22505 | 2588 |
// Output: |
2589 |
// rax - input length |
|
2590 |
// |
|
14132 | 2591 |
|
2592 |
address generate_cipherBlockChaining_decryptAESCrypt() { |
|
14834 | 2593 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2594 |
__ align(CodeEntryAlignment); |
2595 |
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); |
|
2596 |
address start = __ pc(); |
|
2597 |
||
2598 |
Label L_exit, L_key_192_256, L_key_256; |
|
2599 |
Label L_singleBlock_loopTop_128; |
|
2600 |
Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; |
|
2601 |
const Register from = rsi; // source array address |
|
2602 |
const Register to = rdx; // destination array address |
|
2603 |
const Register key = rcx; // key array address |
|
2604 |
const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2605 |
// and left with the results of the last encryption block |
|
2606 |
const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2607 |
const Register pos = rax; |
|
2608 |
||
2609 |
// xmm register assignments for the loops below |
|
2610 |
const XMMRegister xmm_result = xmm0; |
|
2611 |
const XMMRegister xmm_temp = xmm1; |
|
2612 |
// first 6 keys preloaded into xmm2-xmm7 |
|
2613 |
const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2614 |
const int XMM_REG_NUM_KEY_LAST = 7; |
|
2615 |
const int FIRST_NON_REG_KEY_offset = 0x70; |
|
2616 |
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2617 |
||
2618 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2619 |
handleSOERegisters(true /*saving*/); |
|
2620 |
||
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2621 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2622 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2623 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2624 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2625 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2626 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2627 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2628 |
|
14132 | 2629 |
// load registers from incoming parameters |
2630 |
const Address from_param(rbp, 8+0); |
|
2631 |
const Address to_param (rbp, 8+4); |
|
2632 |
const Address key_param (rbp, 8+8); |
|
2633 |
const Address rvec_param (rbp, 8+12); |
|
2634 |
const Address len_param (rbp, 8+16); |
|
2635 |
__ movptr(from , from_param); |
|
2636 |
__ movptr(to , to_param); |
|
2637 |
__ movptr(key , key_param); |
|
2638 |
__ movptr(rvec , rvec_param); |
|
2639 |
__ movptr(len_reg , len_param); |
|
2640 |
||
2641 |
// the java expanded key ordering is rotated one position from what we want |
|
2642 |
// so we start from 0x10 here and hit 0x00 last |
|
2643 |
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front |
|
2644 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2645 |
// load up xmm regs 2 thru 6 with first 5 keys |
|
2646 |
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2647 |
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2648 |
offset += 0x10; |
|
2649 |
} |
|
2650 |
||
2651 |
// inside here, use the rvec register to point to previous block cipher |
|
2652 |
// with which we xor at the end of each newly decrypted block |
|
2653 |
const Register prev_block_cipher_ptr = rvec; |
|
2654 |
||
2655 |
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2656 |
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2657 |
__ cmpl(rax, 44); |
|
2658 |
__ jcc(Assembler::notEqual, L_key_192_256); |
|
2659 |
||
2660 |
||
2661 |
// 128-bit code follows here, parallelized |
|
14834 | 2662 |
__ movl(pos, 0); |
2663 |
__ align(OptoLoopAlignment); |
|
2664 |
__ BIND(L_singleBlock_loopTop_128); |
|
14132 | 2665 |
__ cmpptr(len_reg, 0); // any blocks left?? |
2666 |
__ jcc(Assembler::equal, L_exit); |
|
2667 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2668 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2669 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2670 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2671 |
} |
|
2672 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 |
|
2673 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2674 |
} |
|
2675 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2676 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2677 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2678 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2679 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2680 |
// no need to store r to memory until we exit |
|
2681 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2682 |
__ addptr(pos, AESBlockSize); |
|
2683 |
__ subptr(len_reg, AESBlockSize); |
|
2684 |
__ jmp(L_singleBlock_loopTop_128); |
|
2685 |
||
2686 |
||
2687 |
__ BIND(L_exit); |
|
2688 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2689 |
__ movptr(rvec , rvec_param); // restore this since used in loop |
|
2690 |
__ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object |
|
2691 |
handleSOERegisters(false /*restoring*/); |
|
22505 | 2692 |
__ movptr(rax, len_param); // return length |
14132 | 2693 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
2694 |
__ ret(0); |
|
2695 |
||
2696 |
||
2697 |
__ BIND(L_key_192_256); |
|
2698 |
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
2699 |
__ cmpl(rax, 52); |
|
2700 |
__ jcc(Assembler::notEqual, L_key_256); |
|
2701 |
||
2702 |
// 192-bit code follows here (could be optimized to use parallelism) |
|
14834 | 2703 |
__ movl(pos, 0); |
14132 | 2704 |
__ align(OptoLoopAlignment); |
2705 |
__ BIND(L_singleBlock_loopTop_192); |
|
2706 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2707 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2708 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2709 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2710 |
} |
|
2711 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 |
|
2712 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2713 |
} |
|
2714 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2715 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2716 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2717 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2718 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2719 |
// no need to store r to memory until we exit |
|
2720 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2721 |
__ addptr(pos, AESBlockSize); |
|
2722 |
__ subptr(len_reg, AESBlockSize); |
|
2723 |
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); |
|
2724 |
__ jmp(L_exit); |
|
2725 |
||
2726 |
__ BIND(L_key_256); |
|
2727 |
// 256-bit code follows here (could be optimized to use parallelism) |
|
14834 | 2728 |
__ movl(pos, 0); |
14132 | 2729 |
__ align(OptoLoopAlignment); |
2730 |
__ BIND(L_singleBlock_loopTop_256); |
|
2731 |
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2732 |
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2733 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2734 |
__ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2735 |
} |
|
2736 |
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 |
|
2737 |
aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2738 |
} |
|
2739 |
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2740 |
__ aesdeclast(xmm_result, xmm_temp); |
|
2741 |
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2742 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2743 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2744 |
// no need to store r to memory until we exit |
|
2745 |
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2746 |
__ addptr(pos, AESBlockSize); |
|
2747 |
__ subptr(len_reg, AESBlockSize); |
|
2748 |
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); |
|
2749 |
__ jmp(L_exit); |
|
2750 |
||
2751 |
return start; |
|
2752 |
} |
|
2753 |
||
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2754 |
// byte swap x86 long |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2755 |
address generate_ghash_long_swap_mask() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2756 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2757 |
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2758 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2759 |
__ emit_data(0x0b0a0908, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2760 |
__ emit_data(0x0f0e0d0c, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2761 |
__ emit_data(0x03020100, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2762 |
__ emit_data(0x07060504, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2763 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2764 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2765 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2766 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2767 |
// byte swap x86 byte array |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2768 |
address generate_ghash_byte_swap_mask() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2769 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2770 |
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2771 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2772 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2773 |
__ emit_data(0x08090a0b, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2774 |
__ emit_data(0x04050607, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2775 |
__ emit_data(0x00010203, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2776 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2777 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2778 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2779 |
/* Single and multi-block ghash operations */ |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2780 |
address generate_ghash_processBlocks() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2781 |
assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2782 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2783 |
Label L_ghash_loop, L_exit; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2784 |
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2785 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2786 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2787 |
const Register state = rdi; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2788 |
const Register subkeyH = rsi; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2789 |
const Register data = rdx; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2790 |
const Register blocks = rcx; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2791 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2792 |
const Address state_param(rbp, 8+0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2793 |
const Address subkeyH_param(rbp, 8+4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2794 |
const Address data_param(rbp, 8+8); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2795 |
const Address blocks_param(rbp, 8+12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2796 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2797 |
const XMMRegister xmm_temp0 = xmm0; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2798 |
const XMMRegister xmm_temp1 = xmm1; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2799 |
const XMMRegister xmm_temp2 = xmm2; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2800 |
const XMMRegister xmm_temp3 = xmm3; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2801 |
const XMMRegister xmm_temp4 = xmm4; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2802 |
const XMMRegister xmm_temp5 = xmm5; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2803 |
const XMMRegister xmm_temp6 = xmm6; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2804 |
const XMMRegister xmm_temp7 = xmm7; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2805 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2806 |
__ enter(); |
31771
c9f593020799
8130341: GHASH 32bit intrinsics has AEADBadTagException
ascarpino
parents:
31404
diff
changeset
|
2807 |
handleSOERegisters(true); // Save registers |
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2808 |
|
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2809 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2810 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2811 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2812 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2813 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2814 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2815 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2816 |
|
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2817 |
__ movptr(state, state_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2818 |
__ movptr(subkeyH, subkeyH_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2819 |
__ movptr(data, data_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2820 |
__ movptr(blocks, blocks_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2821 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2822 |
__ movdqu(xmm_temp0, Address(state, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2823 |
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2824 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2825 |
__ movdqu(xmm_temp1, Address(subkeyH, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2826 |
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2827 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2828 |
__ BIND(L_ghash_loop); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2829 |
__ movdqu(xmm_temp2, Address(data, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2830 |
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2831 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2832 |
__ pxor(xmm_temp0, xmm_temp2); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2833 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2834 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2835 |
// Multiply with the hash key |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2836 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2837 |
__ movdqu(xmm_temp3, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2838 |
__ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2839 |
__ movdqu(xmm_temp4, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2840 |
__ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2841 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2842 |
__ movdqu(xmm_temp5, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2843 |
__ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2844 |
__ movdqu(xmm_temp6, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2845 |
__ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2846 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2847 |
__ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2848 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2849 |
__ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2850 |
__ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2851 |
__ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2852 |
__ pxor(xmm_temp3, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2853 |
__ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2854 |
// of the carry-less multiplication of |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2855 |
// xmm0 by xmm1. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2856 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2857 |
// We shift the result of the multiplication by one bit position |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2858 |
// to the left to cope for the fact that the bits are reversed. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2859 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2860 |
__ movdqu(xmm_temp4, xmm_temp6); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2861 |
__ pslld (xmm_temp3, 1); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2862 |
__ pslld(xmm_temp6, 1); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2863 |
__ psrld(xmm_temp7, 31); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2864 |
__ psrld(xmm_temp4, 31); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2865 |
__ movdqu(xmm_temp5, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2866 |
__ pslldq(xmm_temp4, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2867 |
__ pslldq(xmm_temp7, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2868 |
__ psrldq(xmm_temp5, 12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2869 |
__ por(xmm_temp3, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2870 |
__ por(xmm_temp6, xmm_temp4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2871 |
__ por(xmm_temp6, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2872 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2873 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2874 |
// First phase of the reduction |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2875 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2876 |
// Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2877 |
// independently. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2878 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2879 |
__ movdqu(xmm_temp4, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2880 |
__ movdqu(xmm_temp5, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2881 |
__ pslld(xmm_temp7, 31); // packed right shift shifting << 31 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2882 |
__ pslld(xmm_temp4, 30); // packed right shift shifting << 30 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2883 |
__ pslld(xmm_temp5, 25); // packed right shift shifting << 25 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2884 |
__ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2885 |
__ pxor(xmm_temp7, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2886 |
__ movdqu(xmm_temp4, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2887 |
__ pslldq(xmm_temp7, 12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2888 |
__ psrldq(xmm_temp4, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2889 |
__ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2890 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2891 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2892 |
// Second phase of the reduction |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2893 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2894 |
// Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2895 |
// shift operations. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2896 |
__ movdqu(xmm_temp2, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2897 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2898 |
__ movdqu(xmm_temp5, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2899 |
__ psrld(xmm_temp2, 1); // packed left shifting >> 1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2900 |
__ psrld(xmm_temp7, 2); // packed left shifting >> 2 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2901 |
__ psrld(xmm_temp5, 7); // packed left shifting >> 7 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2902 |
__ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2903 |
__ pxor(xmm_temp2, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2904 |
__ pxor(xmm_temp2, xmm_temp4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2905 |
__ pxor(xmm_temp3, xmm_temp2); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2906 |
__ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2907 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2908 |
__ decrement(blocks); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2909 |
__ jcc(Assembler::zero, L_exit); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2910 |
__ movdqu(xmm_temp0, xmm_temp6); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2911 |
__ addptr(data, 16); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2912 |
__ jmp(L_ghash_loop); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2913 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2914 |
__ BIND(L_exit); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2915 |
// Byte swap 16-byte result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2916 |
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2917 |
__ movdqu(Address(state, 0), xmm_temp6); // store the result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2918 |
|
31771
c9f593020799
8130341: GHASH 32bit intrinsics has AEADBadTagException
ascarpino
parents:
31404
diff
changeset
|
2919 |
handleSOERegisters(false); // restore registers |
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2920 |
__ leave(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2921 |
__ ret(0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2922 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2923 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
2924 |
|
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2925 |
/** |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2926 |
* Arguments: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2927 |
* |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2928 |
* Inputs: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2929 |
* rsp(4) - int crc |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2930 |
* rsp(8) - byte* buf |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2931 |
* rsp(12) - int length |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2932 |
* |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2933 |
* Ouput: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2934 |
* rax - int crc result |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2935 |
*/ |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2936 |
address generate_updateBytesCRC32() { |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2937 |
assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2938 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2939 |
__ align(CodeEntryAlignment); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2940 |
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2941 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2942 |
address start = __ pc(); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2943 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2944 |
const Register crc = rdx; // crc |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2945 |
const Register buf = rsi; // source java byte array address |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2946 |
const Register len = rcx; // length |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2947 |
const Register table = rdi; // crc_table address (reuse register) |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2948 |
const Register tmp = rbx; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2949 |
assert_different_registers(crc, buf, len, table, tmp, rax); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2950 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2951 |
BLOCK_COMMENT("Entry:"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2952 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2953 |
__ push(rsi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2954 |
__ push(rdi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2955 |
__ push(rbx); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2956 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2957 |
Address crc_arg(rbp, 8 + 0); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2958 |
Address buf_arg(rbp, 8 + 4); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2959 |
Address len_arg(rbp, 8 + 8); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2960 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2961 |
// Load up: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2962 |
__ movl(crc, crc_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2963 |
__ movptr(buf, buf_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2964 |
__ movl(len, len_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2965 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2966 |
__ kernel_crc32(crc, buf, len, table, tmp); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2967 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2968 |
__ movl(rax, crc); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2969 |
__ pop(rbx); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2970 |
__ pop(rdi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2971 |
__ pop(rsi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2972 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2973 |
__ ret(0); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2974 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2975 |
return start; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2976 |
} |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
2977 |
|
33066 | 2978 |
/** |
2979 |
* Arguments: |
|
2980 |
* |
|
2981 |
* Inputs: |
|
2982 |
* rsp(4) - int crc |
|
2983 |
* rsp(8) - byte* buf |
|
2984 |
* rsp(12) - int length |
|
2985 |
* rsp(16) - table_start - optional (present only when doing a library_calll, |
|
2986 |
* not used by x86 algorithm) |
|
2987 |
* |
|
2988 |
* Ouput: |
|
2989 |
* rax - int crc result |
|
2990 |
*/ |
|
2991 |
address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) { |
|
2992 |
assert(UseCRC32CIntrinsics, "need SSE4_2"); |
|
2993 |
__ align(CodeEntryAlignment); |
|
2994 |
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); |
|
2995 |
address start = __ pc(); |
|
2996 |
const Register crc = rax; // crc |
|
2997 |
const Register buf = rcx; // source java byte array address |
|
2998 |
const Register len = rdx; // length |
|
2999 |
const Register d = rbx; |
|
3000 |
const Register g = rsi; |
|
3001 |
const Register h = rdi; |
|
3002 |
const Register empty = 0; // will never be used, in order not |
|
3003 |
// to change a signature for crc32c_IPL_Alg2_Alt2 |
|
3004 |
// between 64/32 I'm just keeping it here |
|
3005 |
assert_different_registers(crc, buf, len, d, g, h); |
|
3006 |
||
3007 |
BLOCK_COMMENT("Entry:"); |
|
3008 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3009 |
Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 + |
|
3010 |
// we need to add additional 4 because __ enter |
|
3011 |
// have just pushed ebp on a stack |
|
3012 |
Address buf_arg(rsp, 4 + 4 + 4); |
|
3013 |
Address len_arg(rsp, 4 + 4 + 8); |
|
3014 |
// Load up: |
|
3015 |
__ movl(crc, crc_arg); |
|
3016 |
__ movl(buf, buf_arg); |
|
3017 |
__ movl(len, len_arg); |
|
3018 |
__ push(d); |
|
3019 |
__ push(g); |
|
3020 |
__ push(h); |
|
3021 |
__ crc32c_ipl_alg2_alt2(crc, buf, len, |
|
3022 |
d, g, h, |
|
3023 |
empty, empty, empty, |
|
3024 |
xmm0, xmm1, xmm2, |
|
3025 |
is_pclmulqdq_supported); |
|
3026 |
__ pop(h); |
|
3027 |
__ pop(g); |
|
3028 |
__ pop(d); |
|
3029 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3030 |
__ ret(0); |
|
3031 |
||
3032 |
return start; |
|
3033 |
} |
|
3034 |
||
33089 | 3035 |
address generate_libmExp() { |
3036 |
address start = __ pc(); |
|
3037 |
||
3038 |
const XMMRegister x0 = xmm0; |
|
3039 |
const XMMRegister x1 = xmm1; |
|
3040 |
const XMMRegister x2 = xmm2; |
|
3041 |
const XMMRegister x3 = xmm3; |
|
3042 |
||
3043 |
const XMMRegister x4 = xmm4; |
|
3044 |
const XMMRegister x5 = xmm5; |
|
3045 |
const XMMRegister x6 = xmm6; |
|
3046 |
const XMMRegister x7 = xmm7; |
|
3047 |
||
3048 |
const Register tmp = rbx; |
|
3049 |
||
3050 |
BLOCK_COMMENT("Entry:"); |
|
3051 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3052 |
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); |
|
3053 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3054 |
__ ret(0); |
|
3055 |
||
3056 |
return start; |
|
3057 |
||
3058 |
} |
|
3059 |
||
33465 | 3060 |
address generate_libmLog() { |
3061 |
address start = __ pc(); |
|
3062 |
||
3063 |
const XMMRegister x0 = xmm0; |
|
3064 |
const XMMRegister x1 = xmm1; |
|
3065 |
const XMMRegister x2 = xmm2; |
|
3066 |
const XMMRegister x3 = xmm3; |
|
3067 |
||
3068 |
const XMMRegister x4 = xmm4; |
|
3069 |
const XMMRegister x5 = xmm5; |
|
3070 |
const XMMRegister x6 = xmm6; |
|
3071 |
const XMMRegister x7 = xmm7; |
|
3072 |
||
3073 |
const Register tmp = rbx; |
|
3074 |
||
3075 |
BLOCK_COMMENT("Entry:"); |
|
3076 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3077 |
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); |
|
3078 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3079 |
__ ret(0); |
|
3080 |
||
3081 |
return start; |
|
3082 |
||
3083 |
} |
|
3084 |
||
3085 |
||
33089 | 3086 |
|
18740 | 3087 |
// Safefetch stubs. |
3088 |
void generate_safefetch(const char* name, int size, address* entry, |
|
3089 |
address* fault_pc, address* continuation_pc) { |
|
3090 |
// safefetch signatures: |
|
3091 |
// int SafeFetch32(int* adr, int errValue); |
|
3092 |
// intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); |
|
3093 |
||
3094 |
StubCodeMark mark(this, "StubRoutines", name); |
|
3095 |
||
3096 |
// Entry point, pc or function descriptor. |
|
3097 |
*entry = __ pc(); |
|
3098 |
||
3099 |
__ movl(rax, Address(rsp, 0x8)); |
|
3100 |
__ movl(rcx, Address(rsp, 0x4)); |
|
3101 |
// Load *adr into eax, may fault. |
|
3102 |
*fault_pc = __ pc(); |
|
3103 |
switch (size) { |
|
3104 |
case 4: |
|
3105 |
// int32_t |
|
3106 |
__ movl(rax, Address(rcx, 0)); |
|
3107 |
break; |
|
3108 |
case 8: |
|
3109 |
// int64_t |
|
3110 |
Unimplemented(); |
|
3111 |
break; |
|
3112 |
default: |
|
3113 |
ShouldNotReachHere(); |
|
3114 |
} |
|
3115 |
||
3116 |
// Return errValue or *adr. |
|
3117 |
*continuation_pc = __ pc(); |
|
3118 |
__ ret(0); |
|
3119 |
} |
|
14132 | 3120 |
|
1 | 3121 |
public: |
3122 |
// Information about frame layout at time of blocking runtime call. |
|
3123 |
// Note that we only have to preserve callee-saved registers since |
|
3124 |
// the compilers are responsible for supplying a continuation point |
|
3125 |
// if they expect all registers to be preserved. |
|
3126 |
enum layout { |
|
3127 |
thread_off, // last_java_sp |
|
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3128 |
arg1_off, |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3129 |
arg2_off, |
1 | 3130 |
rbp_off, // callee saved register |
3131 |
ret_pc, |
|
3132 |
framesize |
|
3133 |
}; |
|
3134 |
||
3135 |
private: |
|
3136 |
||
3137 |
#undef __ |
|
3138 |
#define __ masm-> |
|
3139 |
||
3140 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3141 |
// Continuation point for throwing of implicit exceptions that are not handled in |
|
3142 |
// the current activation. Fabricates an exception oop and initiates normal |
|
3143 |
// exception dispatching in this frame. |
|
3144 |
// |
|
3145 |
// Previously the compiler (c2) allowed for callee save registers on Java calls. |
|
3146 |
// This is no longer true after adapter frames were removed but could possibly |
|
3147 |
// be brought back in the future if the interpreter code was reworked and it |
|
3148 |
// was deemed worthwhile. The comment below was left to describe what must |
|
3149 |
// happen here if callee saves were resurrected. As it stands now this stub |
|
3150 |
// could actually be a vanilla BufferBlob and have now oopMap at all. |
|
3151 |
// Since it doesn't make much difference we've chosen to leave it the |
|
3152 |
// way it was in the callee save days and keep the comment. |
|
3153 |
||
3154 |
// If we need to preserve callee-saved values we need a callee-saved oop map and |
|
3155 |
// therefore have to make these stubs into RuntimeStubs rather than BufferBlobs. |
|
3156 |
// If the compiler needs all registers to be preserved between the fault |
|
3157 |
// point and the exception handler then it must assume responsibility for that in |
|
3158 |
// AbstractCompiler::continuation_for_implicit_null_exception or |
|
3159 |
// continuation_for_implicit_division_by_zero_exception. All other implicit |
|
3160 |
// exceptions (e.g., NullPointerException or AbstractMethodError on entry) are |
|
3161 |
// either at call sites or otherwise assume that stack unwinding will be initiated, |
|
3162 |
// so caller saved registers were assumed volatile in the compiler. |
|
3163 |
address generate_throw_exception(const char* name, address runtime_entry, |
|
10545 | 3164 |
Register arg1 = noreg, Register arg2 = noreg) { |
1 | 3165 |
|
3166 |
int insts_size = 256; |
|
3167 |
int locs_size = 32; |
|
3168 |
||
3169 |
CodeBuffer code(name, insts_size, locs_size); |
|
3170 |
OopMapSet* oop_maps = new OopMapSet(); |
|
3171 |
MacroAssembler* masm = new MacroAssembler(&code); |
|
3172 |
||
3173 |
address start = __ pc(); |
|
3174 |
||
3175 |
// This is an inlined and slightly modified version of call_VM |
|
3176 |
// which has the ability to fetch the return PC out of |
|
3177 |
// thread-local storage and also sets up last_Java_sp slightly |
|
3178 |
// differently than the real call_VM |
|
3179 |
Register java_thread = rbx; |
|
3180 |
__ get_thread(java_thread); |
|
3181 |
||
3182 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3183 |
||
3184 |
// pc and rbp, already pushed |
|
1066 | 3185 |
__ subptr(rsp, (framesize-2) * wordSize); // prolog |
1 | 3186 |
|
3187 |
// Frame is now completed as far as size and linkage. |
|
3188 |
||
3189 |
int frame_complete = __ pc() - start; |
|
3190 |
||
3191 |
// push java thread (becomes first argument of C function) |
|
1066 | 3192 |
__ movptr(Address(rsp, thread_off * wordSize), java_thread); |
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3193 |
if (arg1 != noreg) { |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3194 |
__ movptr(Address(rsp, arg1_off * wordSize), arg1); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3195 |
} |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3196 |
if (arg2 != noreg) { |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3197 |
assert(arg1 != noreg, "missing reg arg"); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3198 |
__ movptr(Address(rsp, arg2_off * wordSize), arg2); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3199 |
} |
1 | 3200 |
|
3201 |
// Set up last_Java_sp and last_Java_fp |
|
3202 |
__ set_last_Java_frame(java_thread, rsp, rbp, NULL); |
|
3203 |
||
3204 |
// Call runtime |
|
3205 |
BLOCK_COMMENT("call runtime_entry"); |
|
3206 |
__ call(RuntimeAddress(runtime_entry)); |
|
3207 |
// Generate oop map |
|
3208 |
OopMap* map = new OopMap(framesize, 0); |
|
3209 |
oop_maps->add_gc_map(__ pc() - start, map); |
|
3210 |
||
3211 |
// restore the thread (cannot use the pushed argument since arguments |
|
3212 |
// may be overwritten by C code generated by an optimizing compiler); |
|
3213 |
// however can use the register value directly if it is callee saved. |
|
3214 |
__ get_thread(java_thread); |
|
3215 |
||
3216 |
__ reset_last_Java_frame(java_thread, true, false); |
|
3217 |
||
3218 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3219 |
||
3220 |
// check for pending exceptions |
|
3221 |
#ifdef ASSERT |
|
3222 |
Label L; |
|
1066 | 3223 |
__ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
1 | 3224 |
__ jcc(Assembler::notEqual, L); |
3225 |
__ should_not_reach_here(); |
|
3226 |
__ bind(L); |
|
3227 |
#endif /* ASSERT */ |
|
3228 |
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); |
|
3229 |
||
3230 |
||
3231 |
RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false); |
|
3232 |
return stub->entry_point(); |
|
3233 |
} |
|
3234 |
||
3235 |
||
3236 |
void create_control_words() { |
|
3237 |
// Round to nearest, 53-bit mode, exceptions masked |
|
3238 |
StubRoutines::_fpu_cntrl_wrd_std = 0x027F; |
|
3239 |
// Round to zero, 53-bit mode, exception mased |
|
3240 |
StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F; |
|
3241 |
// Round to nearest, 24-bit mode, exceptions masked |
|
3242 |
StubRoutines::_fpu_cntrl_wrd_24 = 0x007F; |
|
3243 |
// Round to nearest, 64-bit mode, exceptions masked |
|
3244 |
StubRoutines::_fpu_cntrl_wrd_64 = 0x037F; |
|
3245 |
// Round to nearest, 64-bit mode, exceptions masked |
|
3246 |
StubRoutines::_mxcsr_std = 0x1F80; |
|
3247 |
// Note: the following two constants are 80-bit values |
|
3248 |
// layout is critical for correct loading by FPU. |
|
3249 |
// Bias for strict fp multiply/divide |
|
3250 |
StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000 |
|
3251 |
StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000; |
|
3252 |
StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff; |
|
3253 |
// Un-Bias for strict fp multiply/divide |
|
3254 |
StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000 |
|
3255 |
StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000; |
|
3256 |
StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff; |
|
3257 |
} |
|
3258 |
||
3259 |
//--------------------------------------------------------------------------- |
|
3260 |
// Initialization |
|
3261 |
||
3262 |
void generate_initial() { |
|
3263 |
// Generates all stubs and initializes the entry points |
|
3264 |
||
3265 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3266 |
// entry points that exist in all platforms |
|
3267 |
// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than |
|
3268 |
// the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. |
|
3269 |
StubRoutines::_forward_exception_entry = generate_forward_exception(); |
|
3270 |
||
3271 |
StubRoutines::_call_stub_entry = |
|
3272 |
generate_call_stub(StubRoutines::_call_stub_return_address); |
|
3273 |
// is referenced by megamorphic call |
|
3274 |
StubRoutines::_catch_exception_entry = generate_catch_exception(); |
|
3275 |
||
3276 |
// These are currently used by Solaris/Intel |
|
3277 |
StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); |
|
3278 |
||
3279 |
StubRoutines::_handler_for_unsafe_access_entry = |
|
3280 |
generate_handler_for_unsafe_access(); |
|
3281 |
||
3282 |
// platform dependent |
|
3283 |
create_control_words(); |
|
3284 |
||
1066 | 3285 |
StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); |
3286 |
StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd(); |
|
1 | 3287 |
StubRoutines::_d2i_wrapper = generate_d2i_wrapper(T_INT, |
3288 |
CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); |
|
3289 |
StubRoutines::_d2l_wrapper = generate_d2i_wrapper(T_LONG, |
|
3290 |
CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); |
|
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3291 |
|
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3292 |
// Build this early so it's available for the interpreter |
35071
a0910b1d3e0d
8046936: JEP 270: Reserved Stack Areas for Critical Sections
fparain
parents:
33465
diff
changeset
|
3293 |
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", |
a0910b1d3e0d
8046936: JEP 270: Reserved Stack Areas for Critical Sections
fparain
parents:
33465
diff
changeset
|
3294 |
CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); |
a0910b1d3e0d
8046936: JEP 270: Reserved Stack Areas for Critical Sections
fparain
parents:
33465
diff
changeset
|
3295 |
StubRoutines::_throw_delayed_StackOverflowError_entry = generate_throw_exception("delayed StackOverflowError throw_exception", |
a0910b1d3e0d
8046936: JEP 270: Reserved Stack Areas for Critical Sections
fparain
parents:
33465
diff
changeset
|
3296 |
CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError)); |
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3297 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3298 |
if (UseCRC32Intrinsics) { |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3299 |
// set table address before stub generation which use it |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3300 |
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3301 |
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3302 |
} |
33066 | 3303 |
|
3304 |
if (UseCRC32CIntrinsics) { |
|
3305 |
bool supports_clmul = VM_Version::supports_clmul(); |
|
3306 |
StubRoutines::x86::generate_CRC32C_table(supports_clmul); |
|
3307 |
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table; |
|
3308 |
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul); |
|
3309 |
} |
|
33089 | 3310 |
if (VM_Version::supports_sse2()) { |
3311 |
StubRoutines::_dexp = generate_libmExp(); |
|
33465 | 3312 |
StubRoutines::_dlog = generate_libmLog(); |
33089 | 3313 |
} |
1 | 3314 |
} |
3315 |
||
3316 |
||
3317 |
void generate_all() { |
|
3318 |
// Generates all stubs and initializes the entry points |
|
3319 |
||
3320 |
// These entry points require SharedInfo::stack0 to be set up in non-core builds |
|
3321 |
// and need to be relocatable, so they each fabricate a RuntimeStub internally. |
|
10545 | 3322 |
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); |
3323 |
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); |
|
3324 |
StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); |
|
1 | 3325 |
|
3326 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3327 |
// entry points that are platform specific |
|
3328 |
||
3329 |
// support for verify_oop (must happen after universe_init) |
|
3330 |
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); |
|
3331 |
||
3332 |
// arraycopy stubs used by compilers |
|
3333 |
generate_arraycopy_stubs(); |
|
2534 | 3334 |
|
4645
0c5f5b94e93a
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
3262
diff
changeset
|
3335 |
generate_math_stubs(); |
14132 | 3336 |
|
3337 |
// don't bother generating these AES intrinsic stubs unless global flag is set |
|
3338 |
if (UseAESIntrinsics) { |
|
3339 |
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others |
|
3340 |
||
3341 |
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); |
|
3342 |
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); |
|
3343 |
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); |
|
3344 |
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); |
|
3345 |
} |
|
18740 | 3346 |
|
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3347 |
// Generate GHASH intrinsics code |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3348 |
if (UseGHASHIntrinsics) { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3349 |
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3350 |
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3351 |
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3352 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3353 |
|
18740 | 3354 |
// Safefetch stubs. |
3355 |
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, |
|
3356 |
&StubRoutines::_safefetch32_fault_pc, |
|
3357 |
&StubRoutines::_safefetch32_continuation_pc); |
|
3358 |
StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; |
|
3359 |
StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; |
|
3360 |
StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; |
|
1 | 3361 |
} |
3362 |
||
3363 |
||
3364 |
public: |
|
3365 |
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |
|
3366 |
if (all) { |
|
3367 |
generate_all(); |
|
3368 |
} else { |
|
3369 |
generate_initial(); |
|
3370 |
} |
|
3371 |
} |
|
3372 |
}; // end class declaration |
|
3373 |
||
3374 |
||
3375 |
void StubGenerator_generate(CodeBuffer* code, bool all) { |
|
3376 |
StubGenerator g(code, all); |
|
3377 |
} |