author | duke |
Wed, 05 Jul 2017 23:10:03 +0200 | |
changeset 44509 | 02253db2ace1 |
parent 42618 | 08162de8f053 |
child 46440 | 61025eecb743 |
permissions | -rw-r--r-- |
1 | 1 |
/* |
38209
b2a58604e046
8156088: Reintegrate 8153892: Handle unsafe access error directly in signal handler instead of going through a stub
mikael
parents:
38190
diff
changeset
|
2 |
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. |
1 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
5547
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
20 |
* or visit www.oracle.com if you need additional information or have any |
f4b087cbb361
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
5419
diff
changeset
|
21 |
* questions. |
1 | 22 |
* |
23 |
*/ |
|
24 |
||
7397 | 25 |
#include "precompiled.hpp" |
14626
0cf4eccf130f
8003240: x86: move MacroAssembler into separate file
twisti
parents:
14132
diff
changeset
|
26 |
#include "asm/macroAssembler.hpp" |
0cf4eccf130f
8003240: x86: move MacroAssembler into separate file
twisti
parents:
14132
diff
changeset
|
27 |
#include "asm/macroAssembler.inline.hpp" |
7397 | 28 |
#include "interpreter/interpreter.hpp" |
29 |
#include "nativeInst_x86.hpp" |
|
30 |
#include "oops/instanceOop.hpp" |
|
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
31 |
#include "oops/method.hpp" |
7397 | 32 |
#include "oops/objArrayKlass.hpp" |
33 |
#include "oops/oop.inline.hpp" |
|
34 |
#include "prims/methodHandles.hpp" |
|
35 |
#include "runtime/frame.inline.hpp" |
|
36 |
#include "runtime/handles.inline.hpp" |
|
37 |
#include "runtime/sharedRuntime.hpp" |
|
38 |
#include "runtime/stubCodeGenerator.hpp" |
|
39 |
#include "runtime/stubRoutines.hpp" |
|
14583
d70ee55535f4
8003935: Simplify the needed includes for using Thread::current()
stefank
parents:
14132
diff
changeset
|
40 |
#include "runtime/thread.inline.hpp" |
7397 | 41 |
#ifdef COMPILER2 |
42 |
#include "opto/runtime.hpp" |
|
43 |
#endif |
|
1 | 44 |
|
45 |
// Declaration and definition of StubGenerator (no .hpp file). |
|
46 |
// For a more detailed description of the stub routine structure |
|
47 |
// see the comment in stubRoutines.hpp |
|
48 |
||
49 |
#define __ _masm-> |
|
1066 | 50 |
#define a__ ((Assembler*)_masm)-> |
1 | 51 |
|
52 |
#ifdef PRODUCT |
|
53 |
#define BLOCK_COMMENT(str) /* nothing */ |
|
54 |
#else |
|
55 |
#define BLOCK_COMMENT(str) __ block_comment(str) |
|
56 |
#endif |
|
57 |
||
58 |
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") |
|
59 |
||
60 |
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions |
|
61 |
const int FPU_CNTRL_WRD_MASK = 0xFFFF; |
|
62 |
||
63 |
// ------------------------------------------------------------------------------------------------------------------------- |
|
64 |
// Stub Code definitions |
|
65 |
||
66 |
class StubGenerator: public StubCodeGenerator { |
|
67 |
private: |
|
68 |
||
69 |
#ifdef PRODUCT |
|
18073
f02460441ddc
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
17622
diff
changeset
|
70 |
#define inc_counter_np(counter) ((void)0) |
1 | 71 |
#else |
72 |
void inc_counter_np_(int& counter) { |
|
1066 | 73 |
__ incrementl(ExternalAddress((address)&counter)); |
1 | 74 |
} |
75 |
#define inc_counter_np(counter) \ |
|
76 |
BLOCK_COMMENT("inc_counter " #counter); \ |
|
77 |
inc_counter_np_(counter); |
|
78 |
#endif //PRODUCT |
|
79 |
||
80 |
void inc_copy_counter_np(BasicType t) { |
|
81 |
#ifndef PRODUCT |
|
82 |
switch (t) { |
|
83 |
case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return; |
|
84 |
case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return; |
|
85 |
case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return; |
|
86 |
case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return; |
|
87 |
case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return; |
|
88 |
} |
|
89 |
ShouldNotReachHere(); |
|
90 |
#endif //PRODUCT |
|
91 |
} |
|
92 |
||
93 |
//------------------------------------------------------------------------------------------------------------------------ |
|
94 |
// Call stubs are used to call Java from C |
|
95 |
// |
|
96 |
// [ return_from_Java ] <--- rsp |
|
97 |
// [ argument word n ] |
|
98 |
// ... |
|
99 |
// -N [ argument word 1 ] |
|
100 |
// -7 [ Possible padding for stack alignment ] |
|
101 |
// -6 [ Possible padding for stack alignment ] |
|
102 |
// -5 [ Possible padding for stack alignment ] |
|
103 |
// -4 [ mxcsr save ] <--- rsp_after_call |
|
104 |
// -3 [ saved rbx, ] |
|
105 |
// -2 [ saved rsi ] |
|
106 |
// -1 [ saved rdi ] |
|
107 |
// 0 [ saved rbp, ] <--- rbp, |
|
108 |
// 1 [ return address ] |
|
109 |
// 2 [ ptr. to call wrapper ] |
|
110 |
// 3 [ result ] |
|
111 |
// 4 [ result_type ] |
|
112 |
// 5 [ method ] |
|
113 |
// 6 [ entry_point ] |
|
114 |
// 7 [ parameters ] |
|
115 |
// 8 [ parameter_size ] |
|
116 |
// 9 [ thread ] |
|
117 |
||
118 |
||
119 |
address generate_call_stub(address& return_address) { |
|
120 |
StubCodeMark mark(this, "StubRoutines", "call_stub"); |
|
121 |
address start = __ pc(); |
|
122 |
||
123 |
// stub code parameters / addresses |
|
124 |
assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code"); |
|
125 |
bool sse_save = false; |
|
126 |
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()! |
|
127 |
const int locals_count_in_bytes (4*wordSize); |
|
128 |
const Address mxcsr_save (rbp, -4 * wordSize); |
|
129 |
const Address saved_rbx (rbp, -3 * wordSize); |
|
130 |
const Address saved_rsi (rbp, -2 * wordSize); |
|
131 |
const Address saved_rdi (rbp, -1 * wordSize); |
|
132 |
const Address result (rbp, 3 * wordSize); |
|
133 |
const Address result_type (rbp, 4 * wordSize); |
|
134 |
const Address method (rbp, 5 * wordSize); |
|
135 |
const Address entry_point (rbp, 6 * wordSize); |
|
136 |
const Address parameters (rbp, 7 * wordSize); |
|
137 |
const Address parameter_size(rbp, 8 * wordSize); |
|
138 |
const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! |
|
139 |
sse_save = UseSSE > 0; |
|
140 |
||
141 |
// stub code |
|
142 |
__ enter(); |
|
1066 | 143 |
__ movptr(rcx, parameter_size); // parameter counter |
5419 | 144 |
__ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes |
1066 | 145 |
__ addptr(rcx, locals_count_in_bytes); // reserve space for register saves |
146 |
__ subptr(rsp, rcx); |
|
147 |
__ andptr(rsp, -(StackAlignmentInBytes)); // Align stack |
|
1 | 148 |
|
149 |
// save rdi, rsi, & rbx, according to C calling conventions |
|
1066 | 150 |
__ movptr(saved_rdi, rdi); |
151 |
__ movptr(saved_rsi, rsi); |
|
152 |
__ movptr(saved_rbx, rbx); |
|
30624 | 153 |
|
154 |
// provide initial value for required masks |
|
155 |
if (UseAVX > 2) { |
|
156 |
__ movl(rbx, 0xffff); |
|
35113 | 157 |
__ kmovwl(k1, rbx); |
30624 | 158 |
} |
159 |
||
1 | 160 |
// save and initialize %mxcsr |
161 |
if (sse_save) { |
|
162 |
Label skip_ldmx; |
|
163 |
__ stmxcsr(mxcsr_save); |
|
164 |
__ movl(rax, mxcsr_save); |
|
165 |
__ andl(rax, MXCSR_MASK); // Only check control and mask bits |
|
166 |
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); |
|
167 |
__ cmp32(rax, mxcsr_std); |
|
168 |
__ jcc(Assembler::equal, skip_ldmx); |
|
169 |
__ ldmxcsr(mxcsr_std); |
|
170 |
__ bind(skip_ldmx); |
|
171 |
} |
|
172 |
||
173 |
// make sure the control word is correct. |
|
174 |
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); |
|
175 |
||
176 |
#ifdef ASSERT |
|
177 |
// make sure we have no pending exceptions |
|
178 |
{ Label L; |
|
1066 | 179 |
__ movptr(rcx, thread); |
180 |
__ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
1 | 181 |
__ jcc(Assembler::equal, L); |
182 |
__ stop("StubRoutines::call_stub: entered with pending exception"); |
|
183 |
__ bind(L); |
|
184 |
} |
|
185 |
#endif |
|
186 |
||
187 |
// pass parameters if any |
|
188 |
BLOCK_COMMENT("pass parameters if any"); |
|
189 |
Label parameters_done; |
|
190 |
__ movl(rcx, parameter_size); // parameter counter |
|
191 |
__ testl(rcx, rcx); |
|
192 |
__ jcc(Assembler::zero, parameters_done); |
|
193 |
||
194 |
// parameter passing loop |
|
195 |
||
196 |
Label loop; |
|
197 |
// Copy Java parameters in reverse order (receiver last) |
|
198 |
// Note that the argument order is inverted in the process |
|
199 |
// source is rdx[rcx: N-1..0] |
|
200 |
// dest is rsp[rbx: 0..N-1] |
|
201 |
||
1066 | 202 |
__ movptr(rdx, parameters); // parameter pointer |
203 |
__ xorptr(rbx, rbx); |
|
1 | 204 |
|
205 |
__ BIND(loop); |
|
206 |
||
207 |
// get parameter |
|
1066 | 208 |
__ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize)); |
209 |
__ movptr(Address(rsp, rbx, Interpreter::stackElementScale(), |
|
1 | 210 |
Interpreter::expr_offset_in_bytes(0)), rax); // store parameter |
211 |
__ increment(rbx); |
|
212 |
__ decrement(rcx); |
|
213 |
__ jcc(Assembler::notZero, loop); |
|
214 |
||
215 |
// call Java function |
|
216 |
__ BIND(parameters_done); |
|
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
217 |
__ movptr(rbx, method); // get Method* |
1066 | 218 |
__ movptr(rax, entry_point); // get entry_point |
219 |
__ mov(rsi, rsp); // set sender sp |
|
1 | 220 |
BLOCK_COMMENT("call Java function"); |
221 |
__ call(rax); |
|
222 |
||
223 |
BLOCK_COMMENT("call_stub_return_address:"); |
|
224 |
return_address = __ pc(); |
|
225 |
||
8315
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
226 |
#ifdef COMPILER2 |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
227 |
{ |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
228 |
Label L_skip; |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
229 |
if (UseSSE >= 2) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
230 |
__ verify_FPU(0, "call_stub_return"); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
231 |
} else { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
232 |
for (int i = 1; i < 8; i++) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
233 |
__ ffree(i); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
234 |
} |
1 | 235 |
|
8315
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
236 |
// UseSSE <= 1 so double result should be left on TOS |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
237 |
__ movl(rsi, result_type); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
238 |
__ cmpl(rsi, T_DOUBLE); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
239 |
__ jcc(Assembler::equal, L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
240 |
if (UseSSE == 0) { |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
241 |
// UseSSE == 0 so float result should be left on TOS |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
242 |
__ cmpl(rsi, T_FLOAT); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
243 |
__ jcc(Assembler::equal, L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
244 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
245 |
__ ffree(0); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
246 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
247 |
__ BIND(L_skip); |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
248 |
} |
1503f9d7681f
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
7397
diff
changeset
|
249 |
#endif // COMPILER2 |
1 | 250 |
|
251 |
// store result depending on type |
|
252 |
// (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) |
|
1066 | 253 |
__ movptr(rdi, result); |
1 | 254 |
Label is_long, is_float, is_double, exit; |
255 |
__ movl(rsi, result_type); |
|
256 |
__ cmpl(rsi, T_LONG); |
|
257 |
__ jcc(Assembler::equal, is_long); |
|
258 |
__ cmpl(rsi, T_FLOAT); |
|
259 |
__ jcc(Assembler::equal, is_float); |
|
260 |
__ cmpl(rsi, T_DOUBLE); |
|
261 |
__ jcc(Assembler::equal, is_double); |
|
262 |
||
263 |
// handle T_INT case |
|
264 |
__ movl(Address(rdi, 0), rax); |
|
265 |
__ BIND(exit); |
|
266 |
||
267 |
// check that FPU stack is empty |
|
268 |
__ verify_FPU(0, "generate_call_stub"); |
|
269 |
||
270 |
// pop parameters |
|
1066 | 271 |
__ lea(rsp, rsp_after_call); |
1 | 272 |
|
273 |
// restore %mxcsr |
|
274 |
if (sse_save) { |
|
275 |
__ ldmxcsr(mxcsr_save); |
|
276 |
} |
|
277 |
||
278 |
// restore rdi, rsi and rbx, |
|
1066 | 279 |
__ movptr(rbx, saved_rbx); |
280 |
__ movptr(rsi, saved_rsi); |
|
281 |
__ movptr(rdi, saved_rdi); |
|
282 |
__ addptr(rsp, 4*wordSize); |
|
1 | 283 |
|
284 |
// return |
|
1066 | 285 |
__ pop(rbp); |
1 | 286 |
__ ret(0); |
287 |
||
288 |
// handle return types different from T_INT |
|
289 |
__ BIND(is_long); |
|
290 |
__ movl(Address(rdi, 0 * wordSize), rax); |
|
291 |
__ movl(Address(rdi, 1 * wordSize), rdx); |
|
292 |
__ jmp(exit); |
|
293 |
||
294 |
__ BIND(is_float); |
|
295 |
// interpreter uses xmm0 for return values |
|
296 |
if (UseSSE >= 1) { |
|
297 |
__ movflt(Address(rdi, 0), xmm0); |
|
298 |
} else { |
|
299 |
__ fstp_s(Address(rdi, 0)); |
|
300 |
} |
|
301 |
__ jmp(exit); |
|
302 |
||
303 |
__ BIND(is_double); |
|
304 |
// interpreter uses xmm0 for return values |
|
305 |
if (UseSSE >= 2) { |
|
306 |
__ movdbl(Address(rdi, 0), xmm0); |
|
307 |
} else { |
|
308 |
__ fstp_d(Address(rdi, 0)); |
|
309 |
} |
|
310 |
__ jmp(exit); |
|
311 |
||
312 |
return start; |
|
313 |
} |
|
314 |
||
315 |
||
316 |
//------------------------------------------------------------------------------------------------------------------------ |
|
317 |
// Return point for a Java call if there's an exception thrown in Java code. |
|
318 |
// The exception is caught and transformed into a pending exception stored in |
|
319 |
// JavaThread that can be tested from within the VM. |
|
320 |
// |
|
321 |
// Note: Usually the parameters are removed by the callee. In case of an exception |
|
322 |
// crossing an activation frame boundary, that is not the case if the callee |
|
323 |
// is compiled code => need to setup the rsp. |
|
324 |
// |
|
325 |
// rax,: exception oop |
|
326 |
||
327 |
address generate_catch_exception() { |
|
328 |
StubCodeMark mark(this, "StubRoutines", "catch_exception"); |
|
329 |
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()! |
|
330 |
const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()! |
|
331 |
address start = __ pc(); |
|
332 |
||
333 |
// get thread directly |
|
1066 | 334 |
__ movptr(rcx, thread); |
1 | 335 |
#ifdef ASSERT |
336 |
// verify that threads correspond |
|
337 |
{ Label L; |
|
338 |
__ get_thread(rbx); |
|
1066 | 339 |
__ cmpptr(rbx, rcx); |
1 | 340 |
__ jcc(Assembler::equal, L); |
341 |
__ stop("StubRoutines::catch_exception: threads must correspond"); |
|
342 |
__ bind(L); |
|
343 |
} |
|
344 |
#endif |
|
345 |
// set pending exception |
|
346 |
__ verify_oop(rax); |
|
1066 | 347 |
__ movptr(Address(rcx, Thread::pending_exception_offset()), rax ); |
1 | 348 |
__ lea(Address(rcx, Thread::exception_file_offset ()), |
349 |
ExternalAddress((address)__FILE__)); |
|
350 |
__ movl(Address(rcx, Thread::exception_line_offset ()), __LINE__ ); |
|
351 |
// complete return to VM |
|
352 |
assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); |
|
353 |
__ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); |
|
354 |
||
355 |
return start; |
|
356 |
} |
|
357 |
||
358 |
||
359 |
//------------------------------------------------------------------------------------------------------------------------ |
|
360 |
// Continuation point for runtime calls returning with a pending exception. |
|
361 |
// The pending exception check happened in the runtime or native call stub. |
|
362 |
// The pending exception in Thread is converted into a Java-level exception. |
|
363 |
// |
|
364 |
// Contract with Java-level exception handlers: |
|
5046 | 365 |
// rax: exception |
1 | 366 |
// rdx: throwing pc |
367 |
// |
|
368 |
// NOTE: At entry of this stub, exception-pc must be on stack !! |
|
369 |
||
370 |
address generate_forward_exception() { |
|
371 |
StubCodeMark mark(this, "StubRoutines", "forward exception"); |
|
372 |
address start = __ pc(); |
|
5046 | 373 |
const Register thread = rcx; |
374 |
||
375 |
// other registers used in this stub |
|
376 |
const Register exception_oop = rax; |
|
377 |
const Register handler_addr = rbx; |
|
378 |
const Register exception_pc = rdx; |
|
1 | 379 |
|
380 |
// Upon entry, the sp points to the return address returning into Java |
|
381 |
// (interpreted or compiled) code; i.e., the return address becomes the |
|
382 |
// throwing pc. |
|
383 |
// |
|
384 |
// Arguments pushed before the runtime call are still on the stack but |
|
385 |
// the exception handler will reset the stack pointer -> ignore them. |
|
386 |
// A potential result in registers can be ignored as well. |
|
387 |
||
388 |
#ifdef ASSERT |
|
389 |
// make sure this code is only executed if there is a pending exception |
|
390 |
{ Label L; |
|
5046 | 391 |
__ get_thread(thread); |
392 |
__ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
1 | 393 |
__ jcc(Assembler::notEqual, L); |
394 |
__ stop("StubRoutines::forward exception: no pending exception (1)"); |
|
395 |
__ bind(L); |
|
396 |
} |
|
397 |
#endif |
|
398 |
||
399 |
// compute exception handler into rbx, |
|
5046 | 400 |
__ get_thread(thread); |
401 |
__ movptr(exception_pc, Address(rsp, 0)); |
|
1 | 402 |
BLOCK_COMMENT("call exception_handler_for_return_address"); |
5046 | 403 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc); |
404 |
__ mov(handler_addr, rax); |
|
1 | 405 |
|
5046 | 406 |
// setup rax & rdx, remove return address & clear pending exception |
407 |
__ get_thread(thread); |
|
408 |
__ pop(exception_pc); |
|
409 |
__ movptr(exception_oop, Address(thread, Thread::pending_exception_offset())); |
|
410 |
__ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); |
|
1 | 411 |
|
412 |
#ifdef ASSERT |
|
413 |
// make sure exception is set |
|
414 |
{ Label L; |
|
5046 | 415 |
__ testptr(exception_oop, exception_oop); |
1 | 416 |
__ jcc(Assembler::notEqual, L); |
417 |
__ stop("StubRoutines::forward exception: no pending exception (2)"); |
|
418 |
__ bind(L); |
|
419 |
} |
|
420 |
#endif |
|
421 |
||
5046 | 422 |
// Verify that there is really a valid exception in RAX. |
423 |
__ verify_oop(exception_oop); |
|
424 |
||
1 | 425 |
// continue at exception handler (return address removed) |
5046 | 426 |
// rax: exception |
427 |
// rbx: exception handler |
|
1 | 428 |
// rdx: throwing pc |
5046 | 429 |
__ jmp(handler_addr); |
1 | 430 |
|
431 |
return start; |
|
432 |
} |
|
433 |
||
434 |
||
435 |
//---------------------------------------------------------------------------------------------------- |
|
436 |
// Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest) |
|
437 |
// |
|
438 |
// xchg exists as far back as 8086, lock needed for MP only |
|
439 |
// Stack layout immediately after call: |
|
440 |
// |
|
441 |
// 0 [ret addr ] <--- rsp |
|
442 |
// 1 [ ex ] |
|
443 |
// 2 [ dest ] |
|
444 |
// |
|
445 |
// Result: *dest <- ex, return (old *dest) |
|
446 |
// |
|
447 |
// Note: win32 does not currently use this code |
|
448 |
||
449 |
address generate_atomic_xchg() { |
|
450 |
StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); |
|
451 |
address start = __ pc(); |
|
452 |
||
1066 | 453 |
__ push(rdx); |
1 | 454 |
Address exchange(rsp, 2 * wordSize); |
455 |
Address dest_addr(rsp, 3 * wordSize); |
|
456 |
__ movl(rax, exchange); |
|
1066 | 457 |
__ movptr(rdx, dest_addr); |
458 |
__ xchgl(rax, Address(rdx, 0)); |
|
459 |
__ pop(rdx); |
|
1 | 460 |
__ ret(0); |
461 |
||
462 |
return start; |
|
463 |
} |
|
464 |
||
465 |
//---------------------------------------------------------------------------------------------------- |
|
466 |
// Support for void verify_mxcsr() |
|
467 |
// |
|
468 |
// This routine is used with -Xcheck:jni to verify that native |
|
469 |
// JNI code does not return to Java code without restoring the |
|
470 |
// MXCSR register to our expected state. |
|
471 |
||
472 |
||
473 |
address generate_verify_mxcsr() { |
|
474 |
StubCodeMark mark(this, "StubRoutines", "verify_mxcsr"); |
|
475 |
address start = __ pc(); |
|
476 |
||
477 |
const Address mxcsr_save(rsp, 0); |
|
478 |
||
479 |
if (CheckJNICalls && UseSSE > 0 ) { |
|
480 |
Label ok_ret; |
|
481 |
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); |
|
1066 | 482 |
__ push(rax); |
483 |
__ subptr(rsp, wordSize); // allocate a temp location |
|
1 | 484 |
__ stmxcsr(mxcsr_save); |
485 |
__ movl(rax, mxcsr_save); |
|
486 |
__ andl(rax, MXCSR_MASK); |
|
487 |
__ cmp32(rax, mxcsr_std); |
|
488 |
__ jcc(Assembler::equal, ok_ret); |
|
489 |
||
490 |
__ warn("MXCSR changed by native JNI code."); |
|
491 |
||
492 |
__ ldmxcsr(mxcsr_std); |
|
493 |
||
494 |
__ bind(ok_ret); |
|
1066 | 495 |
__ addptr(rsp, wordSize); |
496 |
__ pop(rax); |
|
1 | 497 |
} |
498 |
||
499 |
__ ret(0); |
|
500 |
||
501 |
return start; |
|
502 |
} |
|
503 |
||
504 |
||
505 |
//--------------------------------------------------------------------------- |
|
506 |
// Support for void verify_fpu_cntrl_wrd() |
|
507 |
// |
|
508 |
// This routine is used with -Xcheck:jni to verify that native |
|
509 |
// JNI code does not return to Java code without restoring the |
|
510 |
// FP control word to our expected state. |
|
511 |
||
512 |
address generate_verify_fpu_cntrl_wrd() { |
|
513 |
StubCodeMark mark(this, "StubRoutines", "verify_spcw"); |
|
514 |
address start = __ pc(); |
|
515 |
||
516 |
const Address fpu_cntrl_wrd_save(rsp, 0); |
|
517 |
||
518 |
if (CheckJNICalls) { |
|
519 |
Label ok_ret; |
|
1066 | 520 |
__ push(rax); |
521 |
__ subptr(rsp, wordSize); // allocate a temp location |
|
1 | 522 |
__ fnstcw(fpu_cntrl_wrd_save); |
523 |
__ movl(rax, fpu_cntrl_wrd_save); |
|
524 |
__ andl(rax, FPU_CNTRL_WRD_MASK); |
|
525 |
ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std()); |
|
526 |
__ cmp32(rax, fpu_std); |
|
527 |
__ jcc(Assembler::equal, ok_ret); |
|
528 |
||
529 |
__ warn("Floating point control word changed by native JNI code."); |
|
530 |
||
531 |
__ fldcw(fpu_std); |
|
532 |
||
533 |
__ bind(ok_ret); |
|
1066 | 534 |
__ addptr(rsp, wordSize); |
535 |
__ pop(rax); |
|
1 | 536 |
} |
537 |
||
538 |
__ ret(0); |
|
539 |
||
540 |
return start; |
|
541 |
} |
|
542 |
||
543 |
//--------------------------------------------------------------------------- |
|
544 |
// Wrapper for slow-case handling of double-to-integer conversion |
|
545 |
// d2i or f2i fast case failed either because it is nan or because |
|
546 |
// of under/overflow. |
|
547 |
// Input: FPU TOS: float value |
|
548 |
// Output: rax, (rdx): integer (long) result |
|
549 |
||
550 |
address generate_d2i_wrapper(BasicType t, address fcn) { |
|
551 |
StubCodeMark mark(this, "StubRoutines", "d2i_wrapper"); |
|
552 |
address start = __ pc(); |
|
553 |
||
554 |
// Capture info about frame layout |
|
555 |
enum layout { FPUState_off = 0, |
|
556 |
rbp_off = FPUStateSizeInWords, |
|
557 |
rdi_off, |
|
558 |
rsi_off, |
|
559 |
rcx_off, |
|
560 |
rbx_off, |
|
561 |
saved_argument_off, |
|
562 |
saved_argument_off2, // 2nd half of double |
|
563 |
framesize |
|
564 |
}; |
|
565 |
||
566 |
assert(FPUStateSizeInWords == 27, "update stack layout"); |
|
567 |
||
568 |
// Save outgoing argument to stack across push_FPU_state() |
|
1066 | 569 |
__ subptr(rsp, wordSize * 2); |
1 | 570 |
__ fstp_d(Address(rsp, 0)); |
571 |
||
572 |
// Save CPU & FPU state |
|
1066 | 573 |
__ push(rbx); |
574 |
__ push(rcx); |
|
575 |
__ push(rsi); |
|
576 |
__ push(rdi); |
|
577 |
__ push(rbp); |
|
1 | 578 |
__ push_FPU_state(); |
579 |
||
580 |
// push_FPU_state() resets the FP top of stack |
|
581 |
// Load original double into FP top of stack |
|
582 |
__ fld_d(Address(rsp, saved_argument_off * wordSize)); |
|
583 |
// Store double into stack as outgoing argument |
|
1066 | 584 |
__ subptr(rsp, wordSize*2); |
1 | 585 |
__ fst_d(Address(rsp, 0)); |
586 |
||
587 |
// Prepare FPU for doing math in C-land |
|
588 |
__ empty_FPU_stack(); |
|
589 |
// Call the C code to massage the double. Result in EAX |
|
590 |
if (t == T_INT) |
|
591 |
{ BLOCK_COMMENT("SharedRuntime::d2i"); } |
|
592 |
else if (t == T_LONG) |
|
593 |
{ BLOCK_COMMENT("SharedRuntime::d2l"); } |
|
594 |
__ call_VM_leaf( fcn, 2 ); |
|
595 |
||
596 |
// Restore CPU & FPU state |
|
597 |
__ pop_FPU_state(); |
|
1066 | 598 |
__ pop(rbp); |
599 |
__ pop(rdi); |
|
600 |
__ pop(rsi); |
|
601 |
__ pop(rcx); |
|
602 |
__ pop(rbx); |
|
603 |
__ addptr(rsp, wordSize * 2); |
|
1 | 604 |
|
605 |
__ ret(0); |
|
606 |
||
607 |
return start; |
|
608 |
} |
|
609 |
||
610 |
||
611 |
//---------------------------------------------------------------------------------------------------- |
|
612 |
// Non-destructive plausibility checks for oops |
|
613 |
||
614 |
address generate_verify_oop() { |
|
615 |
StubCodeMark mark(this, "StubRoutines", "verify_oop"); |
|
616 |
address start = __ pc(); |
|
617 |
||
618 |
// Incoming arguments on stack after saving rax,: |
|
619 |
// |
|
620 |
// [tos ]: saved rdx |
|
621 |
// [tos + 1]: saved EFLAGS |
|
622 |
// [tos + 2]: return address |
|
623 |
// [tos + 3]: char* error message |
|
624 |
// [tos + 4]: oop object to verify |
|
625 |
// [tos + 5]: saved rax, - saved by caller and bashed |
|
626 |
||
627 |
Label exit, error; |
|
1066 | 628 |
__ pushf(); |
629 |
__ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); |
|
630 |
__ push(rdx); // save rdx |
|
1 | 631 |
// make sure object is 'reasonable' |
1066 | 632 |
__ movptr(rax, Address(rsp, 4 * wordSize)); // get object |
633 |
__ testptr(rax, rax); |
|
1 | 634 |
__ jcc(Assembler::zero, exit); // if obj is NULL it is ok |
635 |
||
636 |
// Check if the oop is in the right area of memory |
|
637 |
const int oop_mask = Universe::verify_oop_mask(); |
|
638 |
const int oop_bits = Universe::verify_oop_bits(); |
|
1066 | 639 |
__ mov(rdx, rax); |
640 |
__ andptr(rdx, oop_mask); |
|
641 |
__ cmpptr(rdx, oop_bits); |
|
1 | 642 |
__ jcc(Assembler::notZero, error); |
643 |
||
13728
882756847a04
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
13391
diff
changeset
|
644 |
// make sure klass is 'reasonable', which is not zero. |
1066 | 645 |
__ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass |
646 |
__ testptr(rax, rax); |
|
1 | 647 |
__ jcc(Assembler::zero, error); // if klass is NULL it is broken |
648 |
||
649 |
// return if everything seems ok |
|
650 |
__ bind(exit); |
|
1066 | 651 |
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
652 |
__ pop(rdx); // restore rdx |
|
653 |
__ popf(); // restore EFLAGS |
|
1 | 654 |
__ ret(3 * wordSize); // pop arguments |
655 |
||
656 |
// handle errors |
|
657 |
__ bind(error); |
|
1066 | 658 |
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
659 |
__ pop(rdx); // get saved rdx back |
|
660 |
__ popf(); // get saved EFLAGS off stack -- will be ignored |
|
661 |
__ pusha(); // push registers (eip = return address & msg are already pushed) |
|
1 | 662 |
BLOCK_COMMENT("call MacroAssembler::debug"); |
1066 | 663 |
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); |
664 |
__ popa(); |
|
1 | 665 |
__ ret(3 * wordSize); // pop arguments |
666 |
return start; |
|
667 |
} |
|
668 |
||
669 |
// |
|
670 |
// Generate pre-barrier for array stores |
|
671 |
// |
|
672 |
// Input: |
|
673 |
// start - starting address |
|
3262
30d1c247fc25
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
2534
diff
changeset
|
674 |
// count - element count |
8498 | 675 |
void gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) { |
1 | 676 |
assert_different_registers(start, count); |
677 |
BarrierSet* bs = Universe::heap()->barrier_set(); |
|
678 |
switch (bs->kind()) { |
|
679 |
case BarrierSet::G1SATBCTLogging: |
|
8498 | 680 |
// With G1, don't generate the call if we statically know that the target in uninitialized |
681 |
if (!uninitialized_target) { |
|
682 |
__ pusha(); // push registers |
|
683 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), |
|
684 |
start, count); |
|
685 |
__ popa(); |
|
686 |
} |
|
1 | 687 |
break; |
32596
8feecdee3156
8072817: CardTableExtension kind() should be BarrierSet::CardTableExtension
kbarrett
parents:
31771
diff
changeset
|
688 |
case BarrierSet::CardTableForRS: |
1 | 689 |
case BarrierSet::CardTableExtension: |
690 |
case BarrierSet::ModRef: |
|
691 |
break; |
|
692 |
default : |
|
693 |
ShouldNotReachHere(); |
|
694 |
||
695 |
} |
|
696 |
} |
|
697 |
||
698 |
||
699 |
// |
|
700 |
// Generate a post-barrier for an array store |
|
701 |
// |
|
702 |
// start - starting address |
|
703 |
// count - element count |
|
704 |
// |
|
705 |
// The two input registers are overwritten. |
|
706 |
// |
|
707 |
void gen_write_ref_array_post_barrier(Register start, Register count) { |
|
708 |
BarrierSet* bs = Universe::heap()->barrier_set(); |
|
709 |
assert_different_registers(start, count); |
|
710 |
switch (bs->kind()) { |
|
711 |
case BarrierSet::G1SATBCTLogging: |
|
712 |
{ |
|
1066 | 713 |
__ pusha(); // push registers |
4740
d708800308b7
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
4645
diff
changeset
|
714 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), |
d708800308b7
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
4645
diff
changeset
|
715 |
start, count); |
1066 | 716 |
__ popa(); |
1 | 717 |
} |
718 |
break; |
|
719 |
||
32596
8feecdee3156
8072817: CardTableExtension kind() should be BarrierSet::CardTableExtension
kbarrett
parents:
31771
diff
changeset
|
720 |
case BarrierSet::CardTableForRS: |
1 | 721 |
case BarrierSet::CardTableExtension: |
722 |
{ |
|
29325 | 723 |
CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs); |
1 | 724 |
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
725 |
||
726 |
Label L_loop; |
|
727 |
const Register end = count; // elements count; end == start+count-1 |
|
728 |
assert_different_registers(start, end); |
|
729 |
||
1066 | 730 |
__ lea(end, Address(start, count, Address::times_ptr, -wordSize)); |
731 |
__ shrptr(start, CardTableModRefBS::card_shift); |
|
732 |
__ shrptr(end, CardTableModRefBS::card_shift); |
|
733 |
__ subptr(end, start); // end --> count |
|
1 | 734 |
__ BIND(L_loop); |
957
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
735 |
intptr_t disp = (intptr_t) ct->byte_map_base; |
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
736 |
Address cardtable(start, count, Address::times_1, disp); |
386f9fbd4cb3
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
192
diff
changeset
|
737 |
__ movb(cardtable, 0); |
1 | 738 |
__ decrement(count); |
739 |
__ jcc(Assembler::greaterEqual, L_loop); |
|
740 |
} |
|
741 |
break; |
|
742 |
case BarrierSet::ModRef: |
|
743 |
break; |
|
744 |
default : |
|
745 |
ShouldNotReachHere(); |
|
746 |
||
747 |
} |
|
748 |
} |
|
749 |
||
1437 | 750 |
|
751 |
// Copy 64 bytes chunks |
|
752 |
// |
|
753 |
// Inputs: |
|
754 |
// from - source array address |
|
755 |
// to_from - destination array address - from |
|
756 |
// qword_count - 8-bytes element count, negative |
|
757 |
// |
|
758 |
void xmm_copy_forward(Register from, Register to_from, Register qword_count) { |
|
759 |
assert( UseSSE >= 2, "supported cpu only" ); |
|
760 |
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
|
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
761 |
if (UseAVX > 2) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
762 |
__ push(rbx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
763 |
__ movl(rbx, 0xffff); |
35113 | 764 |
__ kmovwl(k1, rbx); |
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
765 |
__ pop(rbx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
766 |
} |
1437 | 767 |
// Copy 64-byte chunks |
768 |
__ jmpb(L_copy_64_bytes); |
|
5249 | 769 |
__ align(OptoLoopAlignment); |
1437 | 770 |
__ BIND(L_copy_64_bytes_loop); |
771 |
||
15115
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
772 |
if (UseUnalignedLoadStores) { |
30624 | 773 |
if (UseAVX > 2) { |
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
774 |
__ evmovdqul(xmm0, Address(from, 0), Assembler::AVX_512bit); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
775 |
__ evmovdqul(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit); |
30624 | 776 |
} else if (UseAVX == 2) { |
15115
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
777 |
__ vmovdqu(xmm0, Address(from, 0)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
778 |
__ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
779 |
__ vmovdqu(xmm1, Address(from, 32)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
780 |
__ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
781 |
} else { |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
782 |
__ movdqu(xmm0, Address(from, 0)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
783 |
__ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
784 |
__ movdqu(xmm1, Address(from, 16)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
785 |
__ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
786 |
__ movdqu(xmm2, Address(from, 32)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
787 |
__ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
788 |
__ movdqu(xmm3, Address(from, 48)); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
789 |
__ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); |
f8ef87f6f07f
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
14834
diff
changeset
|
790 |
} |
1437 | 791 |
} else { |
792 |
__ movq(xmm0, Address(from, 0)); |
|
793 |
__ movq(Address(from, to_from, Address::times_1, 0), xmm0); |
|
794 |
__ movq(xmm1, Address(from, 8)); |
|
795 |
__ movq(Address(from, to_from, Address::times_1, 8), xmm1); |
|
796 |
__ movq(xmm2, Address(from, 16)); |
|
797 |
__ movq(Address(from, to_from, Address::times_1, 16), xmm2); |
|
798 |
__ movq(xmm3, Address(from, 24)); |
|
799 |
__ movq(Address(from, to_from, Address::times_1, 24), xmm3); |
|
800 |
__ movq(xmm4, Address(from, 32)); |
|
801 |
__ movq(Address(from, to_from, Address::times_1, 32), xmm4); |
|
802 |
__ movq(xmm5, Address(from, 40)); |
|
803 |
__ movq(Address(from, to_from, Address::times_1, 40), xmm5); |
|
804 |
__ movq(xmm6, Address(from, 48)); |
|
805 |
__ movq(Address(from, to_from, Address::times_1, 48), xmm6); |
|
806 |
__ movq(xmm7, Address(from, 56)); |
|
807 |
__ movq(Address(from, to_from, Address::times_1, 56), xmm7); |
|
808 |
} |
|
809 |
||
810 |
__ addl(from, 64); |
|
811 |
__ BIND(L_copy_64_bytes); |
|
812 |
__ subl(qword_count, 8); |
|
813 |
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); |
|
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
814 |
|
30624 | 815 |
if (UseUnalignedLoadStores && (UseAVX == 2)) { |
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
816 |
// clean upper bits of YMM registers |
30299 | 817 |
__ vpxor(xmm0, xmm0); |
818 |
__ vpxor(xmm1, xmm1); |
|
16624
9dbd4b210bf9
8011102: Clear AVX registers after return from JNI call
kvn
parents:
15115
diff
changeset
|
819 |
} |
1437 | 820 |
__ addl(qword_count, 8); |
821 |
__ jccb(Assembler::zero, L_exit); |
|
822 |
// |
|
823 |
// length is too short, just copy qwords |
|
824 |
// |
|
825 |
__ BIND(L_copy_8_bytes); |
|
826 |
__ movq(xmm0, Address(from, 0)); |
|
827 |
__ movq(Address(from, to_from, Address::times_1), xmm0); |
|
828 |
__ addl(from, 8); |
|
829 |
__ decrement(qword_count); |
|
830 |
__ jcc(Assembler::greater, L_copy_8_bytes); |
|
831 |
__ BIND(L_exit); |
|
832 |
} |
|
833 |
||
1 | 834 |
// Copy 64 bytes chunks |
835 |
// |
|
836 |
// Inputs: |
|
837 |
// from - source array address |
|
838 |
// to_from - destination array address - from |
|
839 |
// qword_count - 8-bytes element count, negative |
|
840 |
// |
|
841 |
void mmx_copy_forward(Register from, Register to_from, Register qword_count) { |
|
1437 | 842 |
assert( VM_Version::supports_mmx(), "supported cpu only" ); |
1 | 843 |
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
844 |
// Copy 64-byte chunks |
|
845 |
__ jmpb(L_copy_64_bytes); |
|
5249 | 846 |
__ align(OptoLoopAlignment); |
1 | 847 |
__ BIND(L_copy_64_bytes_loop); |
848 |
__ movq(mmx0, Address(from, 0)); |
|
849 |
__ movq(mmx1, Address(from, 8)); |
|
850 |
__ movq(mmx2, Address(from, 16)); |
|
851 |
__ movq(Address(from, to_from, Address::times_1, 0), mmx0); |
|
852 |
__ movq(mmx3, Address(from, 24)); |
|
853 |
__ movq(Address(from, to_from, Address::times_1, 8), mmx1); |
|
854 |
__ movq(mmx4, Address(from, 32)); |
|
855 |
__ movq(Address(from, to_from, Address::times_1, 16), mmx2); |
|
856 |
__ movq(mmx5, Address(from, 40)); |
|
857 |
__ movq(Address(from, to_from, Address::times_1, 24), mmx3); |
|
858 |
__ movq(mmx6, Address(from, 48)); |
|
859 |
__ movq(Address(from, to_from, Address::times_1, 32), mmx4); |
|
860 |
__ movq(mmx7, Address(from, 56)); |
|
861 |
__ movq(Address(from, to_from, Address::times_1, 40), mmx5); |
|
862 |
__ movq(Address(from, to_from, Address::times_1, 48), mmx6); |
|
863 |
__ movq(Address(from, to_from, Address::times_1, 56), mmx7); |
|
1066 | 864 |
__ addptr(from, 64); |
1 | 865 |
__ BIND(L_copy_64_bytes); |
866 |
__ subl(qword_count, 8); |
|
867 |
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); |
|
868 |
__ addl(qword_count, 8); |
|
869 |
__ jccb(Assembler::zero, L_exit); |
|
870 |
// |
|
871 |
// length is too short, just copy qwords |
|
872 |
// |
|
873 |
__ BIND(L_copy_8_bytes); |
|
874 |
__ movq(mmx0, Address(from, 0)); |
|
875 |
__ movq(Address(from, to_from, Address::times_1), mmx0); |
|
1066 | 876 |
__ addptr(from, 8); |
1 | 877 |
__ decrement(qword_count); |
878 |
__ jcc(Assembler::greater, L_copy_8_bytes); |
|
879 |
__ BIND(L_exit); |
|
880 |
__ emms(); |
|
881 |
} |
|
882 |
||
883 |
address generate_disjoint_copy(BasicType t, bool aligned, |
|
884 |
Address::ScaleFactor sf, |
|
8498 | 885 |
address* entry, const char *name, |
886 |
bool dest_uninitialized = false) { |
|
1 | 887 |
__ align(CodeEntryAlignment); |
888 |
StubCodeMark mark(this, "StubRoutines", name); |
|
889 |
address start = __ pc(); |
|
890 |
||
891 |
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; |
|
892 |
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes; |
|
893 |
||
1066 | 894 |
int shift = Address::times_ptr - sf; |
1 | 895 |
|
896 |
const Register from = rsi; // source array address |
|
897 |
const Register to = rdi; // destination array address |
|
898 |
const Register count = rcx; // elements count |
|
899 |
const Register to_from = to; // (to - from) |
|
900 |
const Register saved_to = rdx; // saved destination array address |
|
901 |
||
902 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 903 |
__ push(rsi); |
904 |
__ push(rdi); |
|
905 |
__ movptr(from , Address(rsp, 12+ 4)); |
|
906 |
__ movptr(to , Address(rsp, 12+ 8)); |
|
1 | 907 |
__ movl(count, Address(rsp, 12+ 12)); |
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
908 |
|
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
909 |
if (entry != NULL) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
910 |
*entry = __ pc(); // Entry point from conjoint arraycopy stub. |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
911 |
BLOCK_COMMENT("Entry:"); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
912 |
} |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
913 |
|
1 | 914 |
if (t == T_OBJECT) { |
915 |
__ testl(count, count); |
|
916 |
__ jcc(Assembler::zero, L_0_count); |
|
8498 | 917 |
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
1066 | 918 |
__ mov(saved_to, to); // save 'to' |
1 | 919 |
} |
920 |
||
1066 | 921 |
__ subptr(to, from); // to --> to_from |
1 | 922 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
923 |
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp |
|
1437 | 924 |
if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { |
1 | 925 |
// align source address at 4 bytes address boundary |
926 |
if (t == T_BYTE) { |
|
927 |
// One byte misalignment happens only for byte arrays |
|
928 |
__ testl(from, 1); |
|
929 |
__ jccb(Assembler::zero, L_skip_align1); |
|
930 |
__ movb(rax, Address(from, 0)); |
|
931 |
__ movb(Address(from, to_from, Address::times_1, 0), rax); |
|
932 |
__ increment(from); |
|
933 |
__ decrement(count); |
|
934 |
__ BIND(L_skip_align1); |
|
935 |
} |
|
936 |
// Two bytes misalignment happens only for byte and short (char) arrays |
|
937 |
__ testl(from, 2); |
|
938 |
__ jccb(Assembler::zero, L_skip_align2); |
|
939 |
__ movw(rax, Address(from, 0)); |
|
940 |
__ movw(Address(from, to_from, Address::times_1, 0), rax); |
|
1066 | 941 |
__ addptr(from, 2); |
1 | 942 |
__ subl(count, 1<<(shift-1)); |
943 |
__ BIND(L_skip_align2); |
|
944 |
} |
|
945 |
if (!VM_Version::supports_mmx()) { |
|
1066 | 946 |
__ mov(rax, count); // save 'count' |
947 |
__ shrl(count, shift); // bytes count |
|
948 |
__ addptr(to_from, from);// restore 'to' |
|
949 |
__ rep_mov(); |
|
950 |
__ subptr(to_from, from);// restore 'to_from' |
|
951 |
__ mov(count, rax); // restore 'count' |
|
1 | 952 |
__ jmpb(L_copy_2_bytes); // all dwords were copied |
953 |
} else { |
|
1437 | 954 |
if (!UseUnalignedLoadStores) { |
955 |
// align to 8 bytes, we know we are 4 byte aligned to start |
|
956 |
__ testptr(from, 4); |
|
957 |
__ jccb(Assembler::zero, L_copy_64_bytes); |
|
958 |
__ movl(rax, Address(from, 0)); |
|
959 |
__ movl(Address(from, to_from, Address::times_1, 0), rax); |
|
960 |
__ addptr(from, 4); |
|
961 |
__ subl(count, 1<<shift); |
|
962 |
} |
|
1 | 963 |
__ BIND(L_copy_64_bytes); |
1066 | 964 |
__ mov(rax, count); |
1 | 965 |
__ shrl(rax, shift+1); // 8 bytes chunk count |
966 |
// |
|
967 |
// Copy 8-byte chunks through MMX registers, 8 per iteration of the loop |
|
968 |
// |
|
1437 | 969 |
if (UseXMMForArrayCopy) { |
970 |
xmm_copy_forward(from, to_from, rax); |
|
971 |
} else { |
|
972 |
mmx_copy_forward(from, to_from, rax); |
|
973 |
} |
|
1 | 974 |
} |
975 |
// copy tailing dword |
|
976 |
__ BIND(L_copy_4_bytes); |
|
977 |
__ testl(count, 1<<shift); |
|
978 |
__ jccb(Assembler::zero, L_copy_2_bytes); |
|
979 |
__ movl(rax, Address(from, 0)); |
|
980 |
__ movl(Address(from, to_from, Address::times_1, 0), rax); |
|
981 |
if (t == T_BYTE || t == T_SHORT) { |
|
1066 | 982 |
__ addptr(from, 4); |
1 | 983 |
__ BIND(L_copy_2_bytes); |
984 |
// copy tailing word |
|
985 |
__ testl(count, 1<<(shift-1)); |
|
986 |
__ jccb(Assembler::zero, L_copy_byte); |
|
987 |
__ movw(rax, Address(from, 0)); |
|
988 |
__ movw(Address(from, to_from, Address::times_1, 0), rax); |
|
989 |
if (t == T_BYTE) { |
|
1066 | 990 |
__ addptr(from, 2); |
1 | 991 |
__ BIND(L_copy_byte); |
992 |
// copy tailing byte |
|
993 |
__ testl(count, 1); |
|
994 |
__ jccb(Assembler::zero, L_exit); |
|
995 |
__ movb(rax, Address(from, 0)); |
|
996 |
__ movb(Address(from, to_from, Address::times_1, 0), rax); |
|
997 |
__ BIND(L_exit); |
|
998 |
} else { |
|
999 |
__ BIND(L_copy_byte); |
|
1000 |
} |
|
1001 |
} else { |
|
1002 |
__ BIND(L_copy_2_bytes); |
|
1003 |
} |
|
1004 |
||
1005 |
if (t == T_OBJECT) { |
|
1006 |
__ movl(count, Address(rsp, 12+12)); // reread 'count' |
|
1066 | 1007 |
__ mov(to, saved_to); // restore 'to' |
1 | 1008 |
gen_write_ref_array_post_barrier(to, count); |
1009 |
__ BIND(L_0_count); |
|
1010 |
} |
|
1011 |
inc_copy_counter_np(t); |
|
1066 | 1012 |
__ pop(rdi); |
1013 |
__ pop(rsi); |
|
1 | 1014 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1066 | 1015 |
__ xorptr(rax, rax); // return 0 |
1 | 1016 |
__ ret(0); |
1017 |
return start; |
|
1018 |
} |
|
1019 |
||
1020 |
||
6433 | 1021 |
address generate_fill(BasicType t, bool aligned, const char *name) { |
1022 |
__ align(CodeEntryAlignment); |
|
1023 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1024 |
address start = __ pc(); |
|
1025 |
||
1026 |
BLOCK_COMMENT("Entry:"); |
|
1027 |
||
1028 |
const Register to = rdi; // source array address |
|
1029 |
const Register value = rdx; // value |
|
1030 |
const Register count = rsi; // elements count |
|
1031 |
||
1032 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1033 |
__ push(rsi); |
|
1034 |
__ push(rdi); |
|
1035 |
__ movptr(to , Address(rsp, 12+ 4)); |
|
1036 |
__ movl(value, Address(rsp, 12+ 8)); |
|
1037 |
__ movl(count, Address(rsp, 12+ 12)); |
|
1038 |
||
1039 |
__ generate_fill(t, aligned, to, value, count, rax, xmm0); |
|
1040 |
||
1041 |
__ pop(rdi); |
|
1042 |
__ pop(rsi); |
|
1043 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1044 |
__ ret(0); |
|
1045 |
return start; |
|
1046 |
} |
|
1047 |
||
1 | 1048 |
address generate_conjoint_copy(BasicType t, bool aligned, |
1049 |
Address::ScaleFactor sf, |
|
1050 |
address nooverlap_target, |
|
8498 | 1051 |
address* entry, const char *name, |
1052 |
bool dest_uninitialized = false) { |
|
1 | 1053 |
__ align(CodeEntryAlignment); |
1054 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1055 |
address start = __ pc(); |
|
1056 |
||
1057 |
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; |
|
1058 |
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1059 |
||
1066 | 1060 |
int shift = Address::times_ptr - sf; |
1 | 1061 |
|
1062 |
const Register src = rax; // source array address |
|
1063 |
const Register dst = rdx; // destination array address |
|
1064 |
const Register from = rsi; // source array address |
|
1065 |
const Register to = rdi; // destination array address |
|
1066 |
const Register count = rcx; // elements count |
|
1067 |
const Register end = rax; // array end address |
|
1068 |
||
1069 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1070 |
__ push(rsi); |
1071 |
__ push(rdi); |
|
1072 |
__ movptr(src , Address(rsp, 12+ 4)); // from |
|
1073 |
__ movptr(dst , Address(rsp, 12+ 8)); // to |
|
1074 |
__ movl2ptr(count, Address(rsp, 12+12)); // count |
|
1 | 1075 |
|
1076 |
if (entry != NULL) { |
|
1077 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
|
1078 |
BLOCK_COMMENT("Entry:"); |
|
1079 |
} |
|
1080 |
||
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1081 |
// nooverlap_target expects arguments in rsi and rdi. |
1066 | 1082 |
__ mov(from, src); |
1083 |
__ mov(to , dst); |
|
1 | 1084 |
|
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1085 |
// arrays overlap test: dispatch to disjoint stub if necessary. |
1 | 1086 |
RuntimeAddress nooverlap(nooverlap_target); |
1066 | 1087 |
__ cmpptr(dst, src); |
1088 |
__ lea(end, Address(src, count, sf, 0)); // src + count * elem_size |
|
1 | 1089 |
__ jump_cc(Assembler::belowEqual, nooverlap); |
1066 | 1090 |
__ cmpptr(dst, end); |
1 | 1091 |
__ jump_cc(Assembler::aboveEqual, nooverlap); |
1092 |
||
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1093 |
if (t == T_OBJECT) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1094 |
__ testl(count, count); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1095 |
__ jcc(Assembler::zero, L_0_count); |
8498 | 1096 |
gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized); |
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1097 |
} |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1098 |
|
1 | 1099 |
// copy from high to low |
1100 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
|
1101 |
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp |
|
1102 |
if (t == T_BYTE || t == T_SHORT) { |
|
1103 |
// Align the end of destination array at 4 bytes address boundary |
|
1066 | 1104 |
__ lea(end, Address(dst, count, sf, 0)); |
1 | 1105 |
if (t == T_BYTE) { |
1106 |
// One byte misalignment happens only for byte arrays |
|
1107 |
__ testl(end, 1); |
|
1108 |
__ jccb(Assembler::zero, L_skip_align1); |
|
1109 |
__ decrement(count); |
|
1110 |
__ movb(rdx, Address(from, count, sf, 0)); |
|
1111 |
__ movb(Address(to, count, sf, 0), rdx); |
|
1112 |
__ BIND(L_skip_align1); |
|
1113 |
} |
|
1114 |
// Two bytes misalignment happens only for byte and short (char) arrays |
|
1115 |
__ testl(end, 2); |
|
1116 |
__ jccb(Assembler::zero, L_skip_align2); |
|
1066 | 1117 |
__ subptr(count, 1<<(shift-1)); |
1 | 1118 |
__ movw(rdx, Address(from, count, sf, 0)); |
1119 |
__ movw(Address(to, count, sf, 0), rdx); |
|
1120 |
__ BIND(L_skip_align2); |
|
1121 |
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
|
1122 |
__ jcc(Assembler::below, L_copy_4_bytes); |
|
1123 |
} |
|
1124 |
||
1125 |
if (!VM_Version::supports_mmx()) { |
|
1126 |
__ std(); |
|
1066 | 1127 |
__ mov(rax, count); // Save 'count' |
1128 |
__ mov(rdx, to); // Save 'to' |
|
1129 |
__ lea(rsi, Address(from, count, sf, -4)); |
|
1130 |
__ lea(rdi, Address(to , count, sf, -4)); |
|
1131 |
__ shrptr(count, shift); // bytes count |
|
1132 |
__ rep_mov(); |
|
1 | 1133 |
__ cld(); |
1066 | 1134 |
__ mov(count, rax); // restore 'count' |
1 | 1135 |
__ andl(count, (1<<shift)-1); // mask the number of rest elements |
1066 | 1136 |
__ movptr(from, Address(rsp, 12+4)); // reread 'from' |
1137 |
__ mov(to, rdx); // restore 'to' |
|
1 | 1138 |
__ jmpb(L_copy_2_bytes); // all dword were copied |
1139 |
} else { |
|
1140 |
// Align to 8 bytes the end of array. It is aligned to 4 bytes already. |
|
1066 | 1141 |
__ testptr(end, 4); |
1 | 1142 |
__ jccb(Assembler::zero, L_copy_8_bytes); |
1143 |
__ subl(count, 1<<shift); |
|
1144 |
__ movl(rdx, Address(from, count, sf, 0)); |
|
1145 |
__ movl(Address(to, count, sf, 0), rdx); |
|
1146 |
__ jmpb(L_copy_8_bytes); |
|
1147 |
||
5249 | 1148 |
__ align(OptoLoopAlignment); |
1 | 1149 |
// Move 8 bytes |
1150 |
__ BIND(L_copy_8_bytes_loop); |
|
1437 | 1151 |
if (UseXMMForArrayCopy) { |
1152 |
__ movq(xmm0, Address(from, count, sf, 0)); |
|
1153 |
__ movq(Address(to, count, sf, 0), xmm0); |
|
1154 |
} else { |
|
1155 |
__ movq(mmx0, Address(from, count, sf, 0)); |
|
1156 |
__ movq(Address(to, count, sf, 0), mmx0); |
|
1157 |
} |
|
1 | 1158 |
__ BIND(L_copy_8_bytes); |
1159 |
__ subl(count, 2<<shift); |
|
1160 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1161 |
__ addl(count, 2<<shift); |
|
1437 | 1162 |
if (!UseXMMForArrayCopy) { |
1163 |
__ emms(); |
|
1164 |
} |
|
1 | 1165 |
} |
1166 |
__ BIND(L_copy_4_bytes); |
|
1167 |
// copy prefix qword |
|
1168 |
__ testl(count, 1<<shift); |
|
1169 |
__ jccb(Assembler::zero, L_copy_2_bytes); |
|
1170 |
__ movl(rdx, Address(from, count, sf, -4)); |
|
1171 |
__ movl(Address(to, count, sf, -4), rdx); |
|
1172 |
||
1173 |
if (t == T_BYTE || t == T_SHORT) { |
|
1174 |
__ subl(count, (1<<shift)); |
|
1175 |
__ BIND(L_copy_2_bytes); |
|
1176 |
// copy prefix dword |
|
1177 |
__ testl(count, 1<<(shift-1)); |
|
1178 |
__ jccb(Assembler::zero, L_copy_byte); |
|
1179 |
__ movw(rdx, Address(from, count, sf, -2)); |
|
1180 |
__ movw(Address(to, count, sf, -2), rdx); |
|
1181 |
if (t == T_BYTE) { |
|
1182 |
__ subl(count, 1<<(shift-1)); |
|
1183 |
__ BIND(L_copy_byte); |
|
1184 |
// copy prefix byte |
|
1185 |
__ testl(count, 1); |
|
1186 |
__ jccb(Assembler::zero, L_exit); |
|
1187 |
__ movb(rdx, Address(from, 0)); |
|
1188 |
__ movb(Address(to, 0), rdx); |
|
1189 |
__ BIND(L_exit); |
|
1190 |
} else { |
|
1191 |
__ BIND(L_copy_byte); |
|
1192 |
} |
|
1193 |
} else { |
|
1194 |
__ BIND(L_copy_2_bytes); |
|
1195 |
} |
|
1196 |
if (t == T_OBJECT) { |
|
1066 | 1197 |
__ movl2ptr(count, Address(rsp, 12+12)); // reread count |
1 | 1198 |
gen_write_ref_array_post_barrier(to, count); |
1199 |
__ BIND(L_0_count); |
|
1200 |
} |
|
1201 |
inc_copy_counter_np(t); |
|
1066 | 1202 |
__ pop(rdi); |
1203 |
__ pop(rsi); |
|
1 | 1204 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1066 | 1205 |
__ xorptr(rax, rax); // return 0 |
1 | 1206 |
__ ret(0); |
1207 |
return start; |
|
1208 |
} |
|
1209 |
||
1210 |
||
1211 |
address generate_disjoint_long_copy(address* entry, const char *name) { |
|
1212 |
__ align(CodeEntryAlignment); |
|
1213 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1214 |
address start = __ pc(); |
|
1215 |
||
1216 |
Label L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1217 |
const Register from = rax; // source array address |
|
1218 |
const Register to = rdx; // destination array address |
|
1219 |
const Register count = rcx; // elements count |
|
1220 |
const Register to_from = rdx; // (to - from) |
|
1221 |
||
1222 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1223 |
__ movptr(from , Address(rsp, 8+0)); // from |
1224 |
__ movptr(to , Address(rsp, 8+4)); // to |
|
1225 |
__ movl2ptr(count, Address(rsp, 8+8)); // count |
|
1 | 1226 |
|
1227 |
*entry = __ pc(); // Entry point from conjoint arraycopy stub. |
|
1228 |
BLOCK_COMMENT("Entry:"); |
|
1229 |
||
1066 | 1230 |
__ subptr(to, from); // to --> to_from |
1 | 1231 |
if (VM_Version::supports_mmx()) { |
1437 | 1232 |
if (UseXMMForArrayCopy) { |
1233 |
xmm_copy_forward(from, to_from, count); |
|
1234 |
} else { |
|
1235 |
mmx_copy_forward(from, to_from, count); |
|
1236 |
} |
|
1 | 1237 |
} else { |
1238 |
__ jmpb(L_copy_8_bytes); |
|
5249 | 1239 |
__ align(OptoLoopAlignment); |
1 | 1240 |
__ BIND(L_copy_8_bytes_loop); |
1241 |
__ fild_d(Address(from, 0)); |
|
1242 |
__ fistp_d(Address(from, to_from, Address::times_1)); |
|
1066 | 1243 |
__ addptr(from, 8); |
1 | 1244 |
__ BIND(L_copy_8_bytes); |
1245 |
__ decrement(count); |
|
1246 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1247 |
} |
|
1248 |
inc_copy_counter_np(T_LONG); |
|
1249 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1250 |
__ xorptr(rax, rax); // return 0 |
1 | 1251 |
__ ret(0); |
1252 |
return start; |
|
1253 |
} |
|
1254 |
||
1255 |
address generate_conjoint_long_copy(address nooverlap_target, |
|
1256 |
address* entry, const char *name) { |
|
1257 |
__ align(CodeEntryAlignment); |
|
1258 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1259 |
address start = __ pc(); |
|
1260 |
||
1261 |
Label L_copy_8_bytes, L_copy_8_bytes_loop; |
|
1262 |
const Register from = rax; // source array address |
|
1263 |
const Register to = rdx; // destination array address |
|
1264 |
const Register count = rcx; // elements count |
|
1265 |
const Register end_from = rax; // source array end address |
|
1266 |
||
1267 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1268 |
__ movptr(from , Address(rsp, 8+0)); // from |
1269 |
__ movptr(to , Address(rsp, 8+4)); // to |
|
1270 |
__ movl2ptr(count, Address(rsp, 8+8)); // count |
|
1 | 1271 |
|
1272 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
|
1273 |
BLOCK_COMMENT("Entry:"); |
|
1274 |
||
1275 |
// arrays overlap test |
|
1066 | 1276 |
__ cmpptr(to, from); |
1 | 1277 |
RuntimeAddress nooverlap(nooverlap_target); |
1278 |
__ jump_cc(Assembler::belowEqual, nooverlap); |
|
1066 | 1279 |
__ lea(end_from, Address(from, count, Address::times_8, 0)); |
1280 |
__ cmpptr(to, end_from); |
|
1281 |
__ movptr(from, Address(rsp, 8)); // from |
|
1 | 1282 |
__ jump_cc(Assembler::aboveEqual, nooverlap); |
1283 |
||
1284 |
__ jmpb(L_copy_8_bytes); |
|
1285 |
||
5249 | 1286 |
__ align(OptoLoopAlignment); |
1 | 1287 |
__ BIND(L_copy_8_bytes_loop); |
1288 |
if (VM_Version::supports_mmx()) { |
|
1437 | 1289 |
if (UseXMMForArrayCopy) { |
1290 |
__ movq(xmm0, Address(from, count, Address::times_8)); |
|
1291 |
__ movq(Address(to, count, Address::times_8), xmm0); |
|
1292 |
} else { |
|
1293 |
__ movq(mmx0, Address(from, count, Address::times_8)); |
|
1294 |
__ movq(Address(to, count, Address::times_8), mmx0); |
|
1295 |
} |
|
1 | 1296 |
} else { |
1297 |
__ fild_d(Address(from, count, Address::times_8)); |
|
1298 |
__ fistp_d(Address(to, count, Address::times_8)); |
|
1299 |
} |
|
1300 |
__ BIND(L_copy_8_bytes); |
|
1301 |
__ decrement(count); |
|
1302 |
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); |
|
1303 |
||
1437 | 1304 |
if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { |
1 | 1305 |
__ emms(); |
1306 |
} |
|
1307 |
inc_copy_counter_np(T_LONG); |
|
1308 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1309 |
__ xorptr(rax, rax); // return 0 |
1 | 1310 |
__ ret(0); |
1311 |
return start; |
|
1312 |
} |
|
1313 |
||
1314 |
||
1315 |
// Helper for generating a dynamic type check. |
|
1316 |
// The sub_klass must be one of {rbx, rdx, rsi}. |
|
1317 |
// The temp is killed. |
|
1318 |
void generate_type_check(Register sub_klass, |
|
1319 |
Address& super_check_offset_addr, |
|
1320 |
Address& super_klass_addr, |
|
1321 |
Register temp, |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1322 |
Label* L_success, Label* L_failure) { |
1 | 1323 |
BLOCK_COMMENT("type_check:"); |
1324 |
||
1325 |
Label L_fallthrough; |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1326 |
#define LOCAL_JCC(assembler_con, label_ptr) \ |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1327 |
if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \ |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1328 |
else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ |
1 | 1329 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1330 |
// The following is a strange variation of the fast path which requires |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1331 |
// one less register, because needed values are on the argument stack. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1332 |
// __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1333 |
// L_success, L_failure, NULL); |
1 | 1334 |
assert_different_registers(sub_klass, temp); |
1335 |
||
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1336 |
int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
1 | 1337 |
|
1338 |
// if the pointers are equal, we are done (e.g., String[] elements) |
|
1066 | 1339 |
__ cmpptr(sub_klass, super_klass_addr); |
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1340 |
LOCAL_JCC(Assembler::equal, L_success); |
1 | 1341 |
|
1342 |
// check the supertype display: |
|
1066 | 1343 |
__ movl2ptr(temp, super_check_offset_addr); |
1 | 1344 |
Address super_check_addr(sub_klass, temp, Address::times_1, 0); |
1066 | 1345 |
__ movptr(temp, super_check_addr); // load displayed supertype |
1346 |
__ cmpptr(temp, super_klass_addr); // test the super type |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1347 |
LOCAL_JCC(Assembler::equal, L_success); |
1 | 1348 |
|
1349 |
// if it was a primary super, we can just fail immediately |
|
1350 |
__ cmpl(super_check_offset_addr, sc_offset); |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1351 |
LOCAL_JCC(Assembler::notEqual, L_failure); |
1 | 1352 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1353 |
// The repne_scan instruction uses fixed registers, which will get spilled. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1354 |
// We happen to know this works best when super_klass is in rax. |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1355 |
Register super_klass = temp; |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1356 |
__ movptr(super_klass, super_klass_addr); |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1357 |
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1358 |
L_success, L_failure); |
1 | 1359 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1360 |
__ bind(L_fallthrough); |
1 | 1361 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1362 |
if (L_success == NULL) { BLOCK_COMMENT("L_success:"); } |
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1363 |
if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); } |
1 | 1364 |
|
2256
82d4e10b7c6b
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
1909
diff
changeset
|
1365 |
#undef LOCAL_JCC |
1 | 1366 |
} |
1367 |
||
1368 |
// |
|
1369 |
// Generate checkcasting array copy stub |
|
1370 |
// |
|
1371 |
// Input: |
|
1372 |
// 4(rsp) - source array address |
|
1373 |
// 8(rsp) - destination array address |
|
1374 |
// 12(rsp) - element count, can be zero |
|
1375 |
// 16(rsp) - size_t ckoff (super_check_offset) |
|
1376 |
// 20(rsp) - oop ckval (super_klass) |
|
1377 |
// |
|
1378 |
// Output: |
|
1379 |
// rax, == 0 - success |
|
1380 |
// rax, == -1^K - failure, where K is partial transfer count |
|
1381 |
// |
|
8498 | 1382 |
address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) { |
1 | 1383 |
__ align(CodeEntryAlignment); |
1384 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1385 |
address start = __ pc(); |
|
1386 |
||
1387 |
Label L_load_element, L_store_element, L_do_card_marks, L_done; |
|
1388 |
||
1389 |
// register use: |
|
1390 |
// rax, rdx, rcx -- loop control (end_from, end_to, count) |
|
1391 |
// rdi, rsi -- element access (oop, klass) |
|
1392 |
// rbx, -- temp |
|
1393 |
const Register from = rax; // source array address |
|
1394 |
const Register to = rdx; // destination array address |
|
1395 |
const Register length = rcx; // elements count |
|
1396 |
const Register elem = rdi; // each oop copied |
|
1397 |
const Register elem_klass = rsi; // each elem._klass (sub_klass) |
|
1398 |
const Register temp = rbx; // lone remaining temp |
|
1399 |
||
1400 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1401 |
||
1066 | 1402 |
__ push(rsi); |
1403 |
__ push(rdi); |
|
1404 |
__ push(rbx); |
|
1 | 1405 |
|
1406 |
Address from_arg(rsp, 16+ 4); // from |
|
1407 |
Address to_arg(rsp, 16+ 8); // to |
|
1408 |
Address length_arg(rsp, 16+12); // elements count |
|
1409 |
Address ckoff_arg(rsp, 16+16); // super_check_offset |
|
1410 |
Address ckval_arg(rsp, 16+20); // super_klass |
|
1411 |
||
1412 |
// Load up: |
|
1066 | 1413 |
__ movptr(from, from_arg); |
1414 |
__ movptr(to, to_arg); |
|
1415 |
__ movl2ptr(length, length_arg); |
|
1 | 1416 |
|
8487
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1417 |
if (entry != NULL) { |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1418 |
*entry = __ pc(); // Entry point from generic arraycopy stub. |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1419 |
BLOCK_COMMENT("Entry:"); |
bf96596f06d2
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
8315
diff
changeset
|
1420 |
} |
1 | 1421 |
|
1422 |
//--------------------------------------------------------------- |
|
1423 |
// Assembler stub will be used for this call to arraycopy |
|
1424 |
// if the two arrays are subtypes of Object[] but the |
|
1425 |
// destination array type is not equal to or a supertype |
|
1426 |
// of the source type. Each element must be separately |
|
1427 |
// checked. |
|
1428 |
||
1429 |
// Loop-invariant addresses. They are exclusive end pointers. |
|
1066 | 1430 |
Address end_from_addr(from, length, Address::times_ptr, 0); |
1431 |
Address end_to_addr(to, length, Address::times_ptr, 0); |
|
1 | 1432 |
|
1433 |
Register end_from = from; // re-use |
|
1434 |
Register end_to = to; // re-use |
|
1435 |
Register count = length; // re-use |
|
1436 |
||
1437 |
// Loop-variant addresses. They assume post-incremented count < 0. |
|
1066 | 1438 |
Address from_element_addr(end_from, count, Address::times_ptr, 0); |
1439 |
Address to_element_addr(end_to, count, Address::times_ptr, 0); |
|
1 | 1440 |
Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); |
1441 |
||
1442 |
// Copy from low to high addresses, indexed from the end of each array. |
|
8498 | 1443 |
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
1066 | 1444 |
__ lea(end_from, end_from_addr); |
1445 |
__ lea(end_to, end_to_addr); |
|
1 | 1446 |
assert(length == count, ""); // else fix next line: |
1066 | 1447 |
__ negptr(count); // negate and test the length |
1 | 1448 |
__ jccb(Assembler::notZero, L_load_element); |
1449 |
||
1450 |
// Empty array: Nothing to do. |
|
1066 | 1451 |
__ xorptr(rax, rax); // return 0 on (trivial) success |
1 | 1452 |
__ jmp(L_done); |
1453 |
||
1454 |
// ======== begin loop ======== |
|
1455 |
// (Loop is rotated; its entry is L_load_element.) |
|
1456 |
// Loop control: |
|
1457 |
// for (count = -count; count != 0; count++) |
|
1458 |
// Base pointers src, dst are biased by 8*count,to last element. |
|
5249 | 1459 |
__ align(OptoLoopAlignment); |
1 | 1460 |
|
1461 |
__ BIND(L_store_element); |
|
1066 | 1462 |
__ movptr(to_element_addr, elem); // store the oop |
1 | 1463 |
__ increment(count); // increment the count toward zero |
1464 |
__ jccb(Assembler::zero, L_do_card_marks); |
|
1465 |
||
1466 |
// ======== loop entry is here ======== |
|
1467 |
__ BIND(L_load_element); |
|
1066 | 1468 |
__ movptr(elem, from_element_addr); // load the oop |
1469 |
__ testptr(elem, elem); |
|
1 | 1470 |
__ jccb(Assembler::zero, L_store_element); |
1471 |
||
1472 |
// (Could do a trick here: Remember last successful non-null |
|
1473 |
// element stored and make a quick oop equality check on it.) |
|
1474 |
||
1066 | 1475 |
__ movptr(elem_klass, elem_klass_addr); // query the object klass |
1 | 1476 |
generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp, |
1477 |
&L_store_element, NULL); |
|
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1478 |
// (On fall-through, we have failed the element type check.) |
1 | 1479 |
// ======== end loop ======== |
1480 |
||
1481 |
// It was a real error; we must depend on the caller to finish the job. |
|
192
d66300cdf939
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
189
diff
changeset
|
1482 |
// Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops. |
d66300cdf939
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
189
diff
changeset
|
1483 |
// Emit GC store barriers for the oops we have copied (length_arg + count), |
1 | 1484 |
// and report their number to the caller. |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1485 |
assert_different_registers(to, count, rax); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1486 |
Label L_post_barrier; |
1 | 1487 |
__ addl(count, length_arg); // transfers = (length - remaining) |
1066 | 1488 |
__ movl2ptr(rax, count); // save the value |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1489 |
__ notptr(rax); // report (-1^K) to caller (does not affect flags) |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1490 |
__ jccb(Assembler::notZero, L_post_barrier); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1491 |
__ jmp(L_done); // K == 0, nothing was copied, skip post barrier |
1 | 1492 |
|
1493 |
// Come here on success only. |
|
1494 |
__ BIND(L_do_card_marks); |
|
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1495 |
__ xorptr(rax, rax); // return 0 on success |
1066 | 1496 |
__ movl2ptr(count, length_arg); |
17622
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1497 |
|
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1498 |
__ BIND(L_post_barrier); |
4037daf22a17
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
16624
diff
changeset
|
1499 |
__ movptr(to, to_arg); // reload |
1 | 1500 |
gen_write_ref_array_post_barrier(to, count); |
1501 |
||
1502 |
// Common exit point (success or failure). |
|
1503 |
__ BIND(L_done); |
|
1066 | 1504 |
__ pop(rbx); |
1505 |
__ pop(rdi); |
|
1506 |
__ pop(rsi); |
|
1 | 1507 |
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); |
1508 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
1509 |
__ ret(0); |
|
1510 |
||
1511 |
return start; |
|
1512 |
} |
|
1513 |
||
1514 |
// |
|
1515 |
// Generate 'unsafe' array copy stub |
|
1516 |
// Though just as safe as the other stubs, it takes an unscaled |
|
1517 |
// size_t argument instead of an element count. |
|
1518 |
// |
|
1519 |
// Input: |
|
1520 |
// 4(rsp) - source array address |
|
1521 |
// 8(rsp) - destination array address |
|
1522 |
// 12(rsp) - byte count, can be zero |
|
1523 |
// |
|
1524 |
// Output: |
|
1525 |
// rax, == 0 - success |
|
1526 |
// rax, == -1 - need to call System.arraycopy |
|
1527 |
// |
|
1528 |
// Examines the alignment of the operands and dispatches |
|
1529 |
// to a long, int, short, or byte copy loop. |
|
1530 |
// |
|
1531 |
address generate_unsafe_copy(const char *name, |
|
1532 |
address byte_copy_entry, |
|
1533 |
address short_copy_entry, |
|
1534 |
address int_copy_entry, |
|
1535 |
address long_copy_entry) { |
|
1536 |
||
1537 |
Label L_long_aligned, L_int_aligned, L_short_aligned; |
|
1538 |
||
1539 |
__ align(CodeEntryAlignment); |
|
1540 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1541 |
address start = __ pc(); |
|
1542 |
||
1543 |
const Register from = rax; // source array address |
|
1544 |
const Register to = rdx; // destination array address |
|
1545 |
const Register count = rcx; // elements count |
|
1546 |
||
1547 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1548 |
__ push(rsi); |
1549 |
__ push(rdi); |
|
1 | 1550 |
Address from_arg(rsp, 12+ 4); // from |
1551 |
Address to_arg(rsp, 12+ 8); // to |
|
1552 |
Address count_arg(rsp, 12+12); // byte count |
|
1553 |
||
1554 |
// Load up: |
|
1066 | 1555 |
__ movptr(from , from_arg); |
1556 |
__ movptr(to , to_arg); |
|
1557 |
__ movl2ptr(count, count_arg); |
|
1 | 1558 |
|
1559 |
// bump this on entry, not on exit: |
|
1560 |
inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); |
|
1561 |
||
1562 |
const Register bits = rsi; |
|
1066 | 1563 |
__ mov(bits, from); |
1564 |
__ orptr(bits, to); |
|
1565 |
__ orptr(bits, count); |
|
1 | 1566 |
|
1567 |
__ testl(bits, BytesPerLong-1); |
|
1568 |
__ jccb(Assembler::zero, L_long_aligned); |
|
1569 |
||
1570 |
__ testl(bits, BytesPerInt-1); |
|
1571 |
__ jccb(Assembler::zero, L_int_aligned); |
|
1572 |
||
1573 |
__ testl(bits, BytesPerShort-1); |
|
1574 |
__ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); |
|
1575 |
||
1576 |
__ BIND(L_short_aligned); |
|
1066 | 1577 |
__ shrptr(count, LogBytesPerShort); // size => short_count |
1 | 1578 |
__ movl(count_arg, count); // update 'count' |
1579 |
__ jump(RuntimeAddress(short_copy_entry)); |
|
1580 |
||
1581 |
__ BIND(L_int_aligned); |
|
1066 | 1582 |
__ shrptr(count, LogBytesPerInt); // size => int_count |
1 | 1583 |
__ movl(count_arg, count); // update 'count' |
1584 |
__ jump(RuntimeAddress(int_copy_entry)); |
|
1585 |
||
1586 |
__ BIND(L_long_aligned); |
|
1066 | 1587 |
__ shrptr(count, LogBytesPerLong); // size => qword_count |
1 | 1588 |
__ movl(count_arg, count); // update 'count' |
1066 | 1589 |
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1590 |
__ pop(rsi); |
|
1 | 1591 |
__ jump(RuntimeAddress(long_copy_entry)); |
1592 |
||
1593 |
return start; |
|
1594 |
} |
|
1595 |
||
1596 |
||
1597 |
// Perform range checks on the proposed arraycopy. |
|
1598 |
// Smashes src_pos and dst_pos. (Uses them up for temps.) |
|
1599 |
void arraycopy_range_checks(Register src, |
|
1600 |
Register src_pos, |
|
1601 |
Register dst, |
|
1602 |
Register dst_pos, |
|
1603 |
Address& length, |
|
1604 |
Label& L_failed) { |
|
1605 |
BLOCK_COMMENT("arraycopy_range_checks:"); |
|
1606 |
const Register src_end = src_pos; // source array end position |
|
1607 |
const Register dst_end = dst_pos; // destination array end position |
|
1608 |
__ addl(src_end, length); // src_pos + length |
|
1609 |
__ addl(dst_end, length); // dst_pos + length |
|
1610 |
||
1611 |
// if (src_pos + length > arrayOop(src)->length() ) FAIL; |
|
1612 |
__ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes())); |
|
1613 |
__ jcc(Assembler::above, L_failed); |
|
1614 |
||
1615 |
// if (dst_pos + length > arrayOop(dst)->length() ) FAIL; |
|
1616 |
__ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes())); |
|
1617 |
__ jcc(Assembler::above, L_failed); |
|
1618 |
||
1619 |
BLOCK_COMMENT("arraycopy_range_checks done"); |
|
1620 |
} |
|
1621 |
||
1622 |
||
1623 |
// |
|
1624 |
// Generate generic array copy stubs |
|
1625 |
// |
|
1626 |
// Input: |
|
1627 |
// 4(rsp) - src oop |
|
1628 |
// 8(rsp) - src_pos |
|
1629 |
// 12(rsp) - dst oop |
|
1630 |
// 16(rsp) - dst_pos |
|
1631 |
// 20(rsp) - element count |
|
1632 |
// |
|
1633 |
// Output: |
|
1634 |
// rax, == 0 - success |
|
1635 |
// rax, == -1^K - failure, where K is partial transfer count |
|
1636 |
// |
|
1637 |
address generate_generic_copy(const char *name, |
|
1638 |
address entry_jbyte_arraycopy, |
|
1639 |
address entry_jshort_arraycopy, |
|
1640 |
address entry_jint_arraycopy, |
|
1641 |
address entry_oop_arraycopy, |
|
1642 |
address entry_jlong_arraycopy, |
|
1643 |
address entry_checkcast_arraycopy) { |
|
1644 |
Label L_failed, L_failed_0, L_objArray; |
|
1645 |
||
1646 |
{ int modulus = CodeEntryAlignment; |
|
1647 |
int target = modulus - 5; // 5 = sizeof jmp(L_failed) |
|
1648 |
int advance = target - (__ offset() % modulus); |
|
1649 |
if (advance < 0) advance += modulus; |
|
1650 |
if (advance > 0) __ nop(advance); |
|
1651 |
} |
|
1652 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1653 |
||
1654 |
// Short-hop target to L_failed. Makes for denser prologue code. |
|
1655 |
__ BIND(L_failed_0); |
|
1656 |
__ jmp(L_failed); |
|
1657 |
assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); |
|
1658 |
||
1659 |
__ align(CodeEntryAlignment); |
|
1660 |
address start = __ pc(); |
|
1661 |
||
1662 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
1066 | 1663 |
__ push(rsi); |
1664 |
__ push(rdi); |
|
1 | 1665 |
|
1666 |
// bump this on entry, not on exit: |
|
1667 |
inc_counter_np(SharedRuntime::_generic_array_copy_ctr); |
|
1668 |
||
1669 |
// Input values |
|
1670 |
Address SRC (rsp, 12+ 4); |
|
1671 |
Address SRC_POS (rsp, 12+ 8); |
|
1672 |
Address DST (rsp, 12+12); |
|
1673 |
Address DST_POS (rsp, 12+16); |
|
1674 |
Address LENGTH (rsp, 12+20); |
|
1675 |
||
1676 |
//----------------------------------------------------------------------- |
|
1677 |
// Assembler stub will be used for this call to arraycopy |
|
1678 |
// if the following conditions are met: |
|
1679 |
// |
|
1680 |
// (1) src and dst must not be null. |
|
1681 |
// (2) src_pos must not be negative. |
|
1682 |
// (3) dst_pos must not be negative. |
|
1683 |
// (4) length must not be negative. |
|
1684 |
// (5) src klass and dst klass should be the same and not NULL. |
|
1685 |
// (6) src and dst should be arrays. |
|
1686 |
// (7) src_pos + length must not exceed length of src. |
|
1687 |
// (8) dst_pos + length must not exceed length of dst. |
|
1688 |
// |
|
1689 |
||
1690 |
const Register src = rax; // source array oop |
|
1691 |
const Register src_pos = rsi; |
|
1692 |
const Register dst = rdx; // destination array oop |
|
1693 |
const Register dst_pos = rdi; |
|
1694 |
const Register length = rcx; // transfer count |
|
1695 |
||
1696 |
// if (src == NULL) return -1; |
|
1066 | 1697 |
__ movptr(src, SRC); // src oop |
1698 |
__ testptr(src, src); |
|
1 | 1699 |
__ jccb(Assembler::zero, L_failed_0); |
1700 |
||
1701 |
// if (src_pos < 0) return -1; |
|
1066 | 1702 |
__ movl2ptr(src_pos, SRC_POS); // src_pos |
1 | 1703 |
__ testl(src_pos, src_pos); |
1704 |
__ jccb(Assembler::negative, L_failed_0); |
|
1705 |
||
1706 |
// if (dst == NULL) return -1; |
|
1066 | 1707 |
__ movptr(dst, DST); // dst oop |
1708 |
__ testptr(dst, dst); |
|
1 | 1709 |
__ jccb(Assembler::zero, L_failed_0); |
1710 |
||
1711 |
// if (dst_pos < 0) return -1; |
|
1066 | 1712 |
__ movl2ptr(dst_pos, DST_POS); // dst_pos |
1 | 1713 |
__ testl(dst_pos, dst_pos); |
1714 |
__ jccb(Assembler::negative, L_failed_0); |
|
1715 |
||
1716 |
// if (length < 0) return -1; |
|
1066 | 1717 |
__ movl2ptr(length, LENGTH); // length |
1 | 1718 |
__ testl(length, length); |
1719 |
__ jccb(Assembler::negative, L_failed_0); |
|
1720 |
||
1721 |
// if (src->klass() == NULL) return -1; |
|
1722 |
Address src_klass_addr(src, oopDesc::klass_offset_in_bytes()); |
|
1723 |
Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes()); |
|
1724 |
const Register rcx_src_klass = rcx; // array klass |
|
1066 | 1725 |
__ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes())); |
1 | 1726 |
|
1727 |
#ifdef ASSERT |
|
1728 |
// assert(src->klass() != NULL); |
|
1729 |
BLOCK_COMMENT("assert klasses not null"); |
|
1730 |
{ Label L1, L2; |
|
1066 | 1731 |
__ testptr(rcx_src_klass, rcx_src_klass); |
1 | 1732 |
__ jccb(Assembler::notZero, L2); // it is broken if klass is NULL |
1733 |
__ bind(L1); |
|
1734 |
__ stop("broken null klass"); |
|
1735 |
__ bind(L2); |
|
1066 | 1736 |
__ cmpptr(dst_klass_addr, (int32_t)NULL_WORD); |
1 | 1737 |
__ jccb(Assembler::equal, L1); // this would be broken also |
1738 |
BLOCK_COMMENT("assert done"); |
|
1739 |
} |
|
1740 |
#endif //ASSERT |
|
1741 |
||
1742 |
// Load layout helper (32-bits) |
|
1743 |
// |
|
1744 |
// |array_tag| | header_size | element_type | |log2_element_size| |
|
1745 |
// 32 30 24 16 8 2 0 |
|
1746 |
// |
|
1747 |
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 |
|
1748 |
// |
|
1749 |
||
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1750 |
int lh_offset = in_bytes(Klass::layout_helper_offset()); |
1 | 1751 |
Address src_klass_lh_addr(rcx_src_klass, lh_offset); |
1752 |
||
1753 |
// Handle objArrays completely differently... |
|
1754 |
jint objArray_lh = Klass::array_layout_helper(T_OBJECT); |
|
1755 |
__ cmpl(src_klass_lh_addr, objArray_lh); |
|
1756 |
__ jcc(Assembler::equal, L_objArray); |
|
1757 |
||
1758 |
// if (src->klass() != dst->klass()) return -1; |
|
1066 | 1759 |
__ cmpptr(rcx_src_klass, dst_klass_addr); |
1 | 1760 |
__ jccb(Assembler::notEqual, L_failed_0); |
1761 |
||
1762 |
const Register rcx_lh = rcx; // layout helper |
|
1763 |
assert(rcx_lh == rcx_src_klass, "known alias"); |
|
1764 |
__ movl(rcx_lh, src_klass_lh_addr); |
|
1765 |
||
1766 |
// if (!src->is_Array()) return -1; |
|
1767 |
__ cmpl(rcx_lh, Klass::_lh_neutral_value); |
|
1768 |
__ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp |
|
1769 |
||
1770 |
// At this point, it is known to be a typeArray (array_tag 0x3). |
|
1771 |
#ifdef ASSERT |
|
1772 |
{ Label L; |
|
1773 |
__ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); |
|
1774 |
__ jcc(Assembler::greaterEqual, L); // signed cmp |
|
1775 |
__ stop("must be a primitive array"); |
|
1776 |
__ bind(L); |
|
1777 |
} |
|
1778 |
#endif |
|
1779 |
||
1780 |
assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); |
|
1781 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
|
1782 |
||
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1783 |
// TypeArrayKlass |
1 | 1784 |
// |
1785 |
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); |
|
1786 |
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); |
|
1787 |
// |
|
1788 |
const Register rsi_offset = rsi; // array offset |
|
1789 |
const Register src_array = src; // src array offset |
|
1790 |
const Register dst_array = dst; // dst array offset |
|
1791 |
const Register rdi_elsize = rdi; // log2 element size |
|
1792 |
||
1066 | 1793 |
__ mov(rsi_offset, rcx_lh); |
1794 |
__ shrptr(rsi_offset, Klass::_lh_header_size_shift); |
|
1795 |
__ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset |
|
1796 |
__ addptr(src_array, rsi_offset); // src array offset |
|
1797 |
__ addptr(dst_array, rsi_offset); // dst array offset |
|
1798 |
__ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize |
|
1 | 1799 |
|
1800 |
// next registers should be set before the jump to corresponding stub |
|
1801 |
const Register from = src; // source array address |
|
1802 |
const Register to = dst; // destination array address |
|
1803 |
const Register count = rcx; // elements count |
|
1804 |
// some of them should be duplicated on stack |
|
1805 |
#define FROM Address(rsp, 12+ 4) |
|
1806 |
#define TO Address(rsp, 12+ 8) // Not used now |
|
1807 |
#define COUNT Address(rsp, 12+12) // Only for oop arraycopy |
|
1808 |
||
1809 |
BLOCK_COMMENT("scale indexes to element size"); |
|
1066 | 1810 |
__ movl2ptr(rsi, SRC_POS); // src_pos |
1811 |
__ shlptr(rsi); // src_pos << rcx (log2 elsize) |
|
1 | 1812 |
assert(src_array == from, ""); |
1066 | 1813 |
__ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize |
1814 |
__ movl2ptr(rdi, DST_POS); // dst_pos |
|
1815 |
__ shlptr(rdi); // dst_pos << rcx (log2 elsize) |
|
1 | 1816 |
assert(dst_array == to, ""); |
1066 | 1817 |
__ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize |
1818 |
__ movptr(FROM, from); // src_addr |
|
1819 |
__ mov(rdi_elsize, rcx_lh); // log2 elsize |
|
1820 |
__ movl2ptr(count, LENGTH); // elements count |
|
1 | 1821 |
|
1822 |
BLOCK_COMMENT("choose copy loop based on element size"); |
|
1823 |
__ cmpl(rdi_elsize, 0); |
|
1824 |
||
1825 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy)); |
|
1826 |
__ cmpl(rdi_elsize, LogBytesPerShort); |
|
1827 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy)); |
|
1828 |
__ cmpl(rdi_elsize, LogBytesPerInt); |
|
1829 |
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy)); |
|
1830 |
#ifdef ASSERT |
|
1831 |
__ cmpl(rdi_elsize, LogBytesPerLong); |
|
1832 |
__ jccb(Assembler::notEqual, L_failed); |
|
1833 |
#endif |
|
1066 | 1834 |
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1835 |
__ pop(rsi); |
|
1 | 1836 |
__ jump(RuntimeAddress(entry_jlong_arraycopy)); |
1837 |
||
1838 |
__ BIND(L_failed); |
|
1066 | 1839 |
__ xorptr(rax, rax); |
1840 |
__ notptr(rax); // return -1 |
|
1841 |
__ pop(rdi); |
|
1842 |
__ pop(rsi); |
|
1 | 1843 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1844 |
__ ret(0); |
|
1845 |
||
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1846 |
// ObjArrayKlass |
1 | 1847 |
__ BIND(L_objArray); |
1848 |
// live at this point: rcx_src_klass, src[_pos], dst[_pos] |
|
1849 |
||
1850 |
Label L_plain_copy, L_checkcast_copy; |
|
1851 |
// test array classes for subtyping |
|
1066 | 1852 |
__ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality |
1 | 1853 |
__ jccb(Assembler::notEqual, L_checkcast_copy); |
1854 |
||
1855 |
// Identically typed arrays can be copied without element-wise checks. |
|
1856 |
assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass); |
|
1857 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
|
1858 |
||
1859 |
__ BIND(L_plain_copy); |
|
1066 | 1860 |
__ movl2ptr(count, LENGTH); // elements count |
1861 |
__ movl2ptr(src_pos, SRC_POS); // reload src_pos |
|
1862 |
__ lea(from, Address(src, src_pos, Address::times_ptr, |
|
1863 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr |
|
1864 |
__ movl2ptr(dst_pos, DST_POS); // reload dst_pos |
|
1865 |
__ lea(to, Address(dst, dst_pos, Address::times_ptr, |
|
1866 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr |
|
1867 |
__ movptr(FROM, from); // src_addr |
|
1868 |
__ movptr(TO, to); // dst_addr |
|
1 | 1869 |
__ movl(COUNT, count); // count |
1870 |
__ jump(RuntimeAddress(entry_oop_arraycopy)); |
|
1871 |
||
1872 |
__ BIND(L_checkcast_copy); |
|
1873 |
// live at this point: rcx_src_klass, dst[_pos], src[_pos] |
|
1874 |
{ |
|
1875 |
// Handy offsets: |
|
13952
e3cf184080bc
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
13728
diff
changeset
|
1876 |
int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); |
11430
718fc06da49a
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
10565
diff
changeset
|
1877 |
int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
1 | 1878 |
|
1879 |
Register rsi_dst_klass = rsi; |
|
1880 |
Register rdi_temp = rdi; |
|
1881 |
assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); |
|
1882 |
assert(rdi_temp == dst_pos, "expected alias w/ dst_pos"); |
|
1883 |
Address dst_klass_lh_addr(rsi_dst_klass, lh_offset); |
|
1884 |
||
1885 |
// Before looking at dst.length, make sure dst is also an objArray. |
|
1066 | 1886 |
__ movptr(rsi_dst_klass, dst_klass_addr); |
1 | 1887 |
__ cmpl(dst_klass_lh_addr, objArray_lh); |
1888 |
__ jccb(Assembler::notEqual, L_failed); |
|
1889 |
||
1890 |
// It is safe to examine both src.length and dst.length. |
|
1066 | 1891 |
__ movl2ptr(src_pos, SRC_POS); // reload rsi |
1 | 1892 |
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
1893 |
// (Now src_pos and dst_pos are killed, but not src and dst.) |
|
1894 |
||
1895 |
// We'll need this temp (don't forget to pop it after the type check). |
|
1066 | 1896 |
__ push(rbx); |
1 | 1897 |
Register rbx_src_klass = rbx; |
1898 |
||
1066 | 1899 |
__ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx |
1900 |
__ movptr(rsi_dst_klass, dst_klass_addr); |
|
1 | 1901 |
Address super_check_offset_addr(rsi_dst_klass, sco_offset); |
1902 |
Label L_fail_array_check; |
|
1903 |
generate_type_check(rbx_src_klass, |
|
1904 |
super_check_offset_addr, dst_klass_addr, |
|
1905 |
rdi_temp, NULL, &L_fail_array_check); |
|
1906 |
// (On fall-through, we have passed the array type check.) |
|
1066 | 1907 |
__ pop(rbx); |
1 | 1908 |
__ jmp(L_plain_copy); |
1909 |
||
1910 |
__ BIND(L_fail_array_check); |
|
1911 |
// Reshuffle arguments so we can call checkcast_arraycopy: |
|
1912 |
||
1913 |
// match initial saves for checkcast_arraycopy |
|
1066 | 1914 |
// push(rsi); // already done; see above |
1915 |
// push(rdi); // already done; see above |
|
1916 |
// push(rbx); // already done; see above |
|
1 | 1917 |
|
1918 |
// Marshal outgoing arguments now, freeing registers. |
|
1919 |
Address from_arg(rsp, 16+ 4); // from |
|
1920 |
Address to_arg(rsp, 16+ 8); // to |
|
1921 |
Address length_arg(rsp, 16+12); // elements count |
|
1922 |
Address ckoff_arg(rsp, 16+16); // super_check_offset |
|
1923 |
Address ckval_arg(rsp, 16+20); // super_klass |
|
1924 |
||
1925 |
Address SRC_POS_arg(rsp, 16+ 8); |
|
1926 |
Address DST_POS_arg(rsp, 16+16); |
|
1927 |
Address LENGTH_arg(rsp, 16+20); |
|
1928 |
// push rbx, changed the incoming offsets (why not just use rbp,??) |
|
1929 |
// assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, ""); |
|
1930 |
||
1066 | 1931 |
__ movptr(rbx, Address(rsi_dst_klass, ek_offset)); |
1932 |
__ movl2ptr(length, LENGTH_arg); // reload elements count |
|
1933 |
__ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos |
|
1934 |
__ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos |
|
1 | 1935 |
|
1066 | 1936 |
__ movptr(ckval_arg, rbx); // destination element type |
1 | 1937 |
__ movl(rbx, Address(rbx, sco_offset)); |
1938 |
__ movl(ckoff_arg, rbx); // corresponding class check offset |
|
1939 |
||
1940 |
__ movl(length_arg, length); // outgoing length argument |
|
1941 |
||
1066 | 1942 |
__ lea(from, Address(src, src_pos, Address::times_ptr, |
1 | 1943 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
1066 | 1944 |
__ movptr(from_arg, from); |
1 | 1945 |
|
1066 | 1946 |
__ lea(to, Address(dst, dst_pos, Address::times_ptr, |
1 | 1947 |
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
1066 | 1948 |
__ movptr(to_arg, to); |
1 | 1949 |
__ jump(RuntimeAddress(entry_checkcast_arraycopy)); |
1950 |
} |
|
1951 |
||
1952 |
return start; |
|
1953 |
} |
|
1954 |
||
1955 |
void generate_arraycopy_stubs() { |
|
1956 |
address entry; |
|
1957 |
address entry_jbyte_arraycopy; |
|
1958 |
address entry_jshort_arraycopy; |
|
1959 |
address entry_jint_arraycopy; |
|
1960 |
address entry_oop_arraycopy; |
|
1961 |
address entry_jlong_arraycopy; |
|
1962 |
address entry_checkcast_arraycopy; |
|
1963 |
||
1964 |
StubRoutines::_arrayof_jbyte_disjoint_arraycopy = |
|
1965 |
generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry, |
|
1966 |
"arrayof_jbyte_disjoint_arraycopy"); |
|
1967 |
StubRoutines::_arrayof_jbyte_arraycopy = |
|
1968 |
generate_conjoint_copy(T_BYTE, true, Address::times_1, entry, |
|
1969 |
NULL, "arrayof_jbyte_arraycopy"); |
|
1970 |
StubRoutines::_jbyte_disjoint_arraycopy = |
|
1971 |
generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry, |
|
1972 |
"jbyte_disjoint_arraycopy"); |
|
1973 |
StubRoutines::_jbyte_arraycopy = |
|
1974 |
generate_conjoint_copy(T_BYTE, false, Address::times_1, entry, |
|
1975 |
&entry_jbyte_arraycopy, "jbyte_arraycopy"); |
|
1976 |
||
1977 |
StubRoutines::_arrayof_jshort_disjoint_arraycopy = |
|
1978 |
generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry, |
|
1979 |
"arrayof_jshort_disjoint_arraycopy"); |
|
1980 |
StubRoutines::_arrayof_jshort_arraycopy = |
|
1981 |
generate_conjoint_copy(T_SHORT, true, Address::times_2, entry, |
|
1982 |
NULL, "arrayof_jshort_arraycopy"); |
|
1983 |
StubRoutines::_jshort_disjoint_arraycopy = |
|
1984 |
generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry, |
|
1985 |
"jshort_disjoint_arraycopy"); |
|
1986 |
StubRoutines::_jshort_arraycopy = |
|
1987 |
generate_conjoint_copy(T_SHORT, false, Address::times_2, entry, |
|
1988 |
&entry_jshort_arraycopy, "jshort_arraycopy"); |
|
1989 |
||
1990 |
// Next arrays are always aligned on 4 bytes at least. |
|
1991 |
StubRoutines::_jint_disjoint_arraycopy = |
|
1992 |
generate_disjoint_copy(T_INT, true, Address::times_4, &entry, |
|
1993 |
"jint_disjoint_arraycopy"); |
|
1994 |
StubRoutines::_jint_arraycopy = |
|
1995 |
generate_conjoint_copy(T_INT, true, Address::times_4, entry, |
|
1996 |
&entry_jint_arraycopy, "jint_arraycopy"); |
|
1997 |
||
1998 |
StubRoutines::_oop_disjoint_arraycopy = |
|
1066 | 1999 |
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
1 | 2000 |
"oop_disjoint_arraycopy"); |
2001 |
StubRoutines::_oop_arraycopy = |
|
1066 | 2002 |
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
1 | 2003 |
&entry_oop_arraycopy, "oop_arraycopy"); |
2004 |
||
8498 | 2005 |
StubRoutines::_oop_disjoint_arraycopy_uninit = |
2006 |
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
|
2007 |
"oop_disjoint_arraycopy_uninit", |
|
2008 |
/*dest_uninitialized*/true); |
|
2009 |
StubRoutines::_oop_arraycopy_uninit = |
|
2010 |
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
|
2011 |
NULL, "oop_arraycopy_uninit", |
|
2012 |
/*dest_uninitialized*/true); |
|
2013 |
||
1 | 2014 |
StubRoutines::_jlong_disjoint_arraycopy = |
2015 |
generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy"); |
|
2016 |
StubRoutines::_jlong_arraycopy = |
|
2017 |
generate_conjoint_long_copy(entry, &entry_jlong_arraycopy, |
|
2018 |
"jlong_arraycopy"); |
|
2019 |
||
6433 | 2020 |
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); |
2021 |
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); |
|
2022 |
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); |
|
2023 |
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); |
|
2024 |
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); |
|
2025 |
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); |
|
2026 |
||
8498 | 2027 |
StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; |
2028 |
StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; |
|
2029 |
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; |
|
2030 |
StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; |
|
1 | 2031 |
|
8498 | 2032 |
StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; |
2033 |
StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; |
|
2034 |
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; |
|
2035 |
StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; |
|
1 | 2036 |
|
2037 |
StubRoutines::_checkcast_arraycopy = |
|
8498 | 2038 |
generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); |
2039 |
StubRoutines::_checkcast_arraycopy_uninit = |
|
2040 |
generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true); |
|
1 | 2041 |
|
2042 |
StubRoutines::_unsafe_arraycopy = |
|
2043 |
generate_unsafe_copy("unsafe_arraycopy", |
|
2044 |
entry_jbyte_arraycopy, |
|
2045 |
entry_jshort_arraycopy, |
|
2046 |
entry_jint_arraycopy, |
|
2047 |
entry_jlong_arraycopy); |
|
2048 |
||
2049 |
StubRoutines::_generic_arraycopy = |
|
2050 |
generate_generic_copy("generic_arraycopy", |
|
2051 |
entry_jbyte_arraycopy, |
|
2052 |
entry_jshort_arraycopy, |
|
2053 |
entry_jint_arraycopy, |
|
2054 |
entry_oop_arraycopy, |
|
2055 |
entry_jlong_arraycopy, |
|
2056 |
entry_checkcast_arraycopy); |
|
2057 |
} |
|
2058 |
||
14132 | 2059 |
// AES intrinsic stubs |
2060 |
enum {AESBlockSize = 16}; |
|
2061 |
||
2062 |
address generate_key_shuffle_mask() { |
|
2063 |
__ align(16); |
|
2064 |
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); |
|
2065 |
address start = __ pc(); |
|
2066 |
__ emit_data(0x00010203, relocInfo::none, 0 ); |
|
2067 |
__ emit_data(0x04050607, relocInfo::none, 0 ); |
|
2068 |
__ emit_data(0x08090a0b, relocInfo::none, 0 ); |
|
2069 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); |
|
2070 |
return start; |
|
2071 |
} |
|
2072 |
||
35154 | 2073 |
address generate_counter_shuffle_mask() { |
2074 |
__ align(16); |
|
2075 |
StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask"); |
|
2076 |
address start = __ pc(); |
|
2077 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0); |
|
2078 |
__ emit_data(0x08090a0b, relocInfo::none, 0); |
|
2079 |
__ emit_data(0x04050607, relocInfo::none, 0); |
|
2080 |
__ emit_data(0x00010203, relocInfo::none, 0); |
|
2081 |
return start; |
|
2082 |
} |
|
2083 |
||
14132 | 2084 |
// Utility routine for loading a 128-bit key word in little endian format |
2085 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2086 |
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2087 |
__ movdqu(xmmdst, Address(key, offset)); |
|
2088 |
if (xmm_shuf_mask != NULL) { |
|
2089 |
__ pshufb(xmmdst, xmm_shuf_mask); |
|
2090 |
} else { |
|
2091 |
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2092 |
} |
|
2093 |
} |
|
2094 |
||
2095 |
// aesenc using specified key+offset |
|
2096 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2097 |
void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2098 |
load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2099 |
__ aesenc(xmmdst, xmmtmp); |
|
2100 |
} |
|
2101 |
||
2102 |
// aesdec using specified key+offset |
|
2103 |
// can optionally specify that the shuffle mask is already in an xmmregister |
|
2104 |
void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2105 |
load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2106 |
__ aesdec(xmmdst, xmmtmp); |
|
2107 |
} |
|
2108 |
||
35154 | 2109 |
// Utility routine for increase 128bit counter (iv in CTR mode) |
2110 |
// XMM_128bit, D3, D2, D1, D0 |
|
2111 |
void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) { |
|
2112 |
__ pextrd(reg, xmmdst, 0x0); |
|
2113 |
__ addl(reg, inc_delta); |
|
2114 |
__ pinsrd(xmmdst, reg, 0x0); |
|
2115 |
__ jcc(Assembler::carryClear, next_block); // jump if no carry |
|
2116 |
||
2117 |
__ pextrd(reg, xmmdst, 0x01); // Carry-> D1 |
|
2118 |
__ addl(reg, 0x01); |
|
2119 |
__ pinsrd(xmmdst, reg, 0x01); |
|
2120 |
__ jcc(Assembler::carryClear, next_block); // jump if no carry |
|
2121 |
||
2122 |
__ pextrd(reg, xmmdst, 0x02); // Carry-> D2 |
|
2123 |
__ addl(reg, 0x01); |
|
2124 |
__ pinsrd(xmmdst, reg, 0x02); |
|
2125 |
__ jcc(Assembler::carryClear, next_block); // jump if no carry |
|
2126 |
||
2127 |
__ pextrd(reg, xmmdst, 0x03); // Carry -> D3 |
|
2128 |
__ addl(reg, 0x01); |
|
2129 |
__ pinsrd(xmmdst, reg, 0x03); |
|
2130 |
||
2131 |
__ BIND(next_block); // next instruction |
|
2132 |
} |
|
2133 |
||
14132 | 2134 |
|
2135 |
// Arguments: |
|
2136 |
// |
|
2137 |
// Inputs: |
|
2138 |
// c_rarg0 - source byte array address |
|
2139 |
// c_rarg1 - destination byte array address |
|
2140 |
// c_rarg2 - K (key) in little endian int array |
|
2141 |
// |
|
2142 |
address generate_aescrypt_encryptBlock() { |
|
14834 | 2143 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2144 |
__ align(CodeEntryAlignment); |
2145 |
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
|
2146 |
Label L_doLast; |
|
2147 |
address start = __ pc(); |
|
2148 |
||
14834 | 2149 |
const Register from = rdx; // source array address |
14132 | 2150 |
const Register to = rdx; // destination array address |
2151 |
const Register key = rcx; // key array address |
|
2152 |
const Register keylen = rax; |
|
2153 |
const Address from_param(rbp, 8+0); |
|
2154 |
const Address to_param (rbp, 8+4); |
|
2155 |
const Address key_param (rbp, 8+8); |
|
2156 |
||
2157 |
const XMMRegister xmm_result = xmm0; |
|
14834 | 2158 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
2159 |
const XMMRegister xmm_temp1 = xmm2; |
|
2160 |
const XMMRegister xmm_temp2 = xmm3; |
|
2161 |
const XMMRegister xmm_temp3 = xmm4; |
|
2162 |
const XMMRegister xmm_temp4 = xmm5; |
|
2163 |
||
2164 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2165 |
|
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2166 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2167 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2168 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2169 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2170 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2171 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2172 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2173 |
|
14834 | 2174 |
__ movptr(from, from_param); |
2175 |
__ movptr(key, key_param); |
|
2176 |
||
2177 |
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
14132 | 2178 |
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2179 |
||
2180 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2181 |
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
|
14834 | 2182 |
__ movptr(to, to_param); |
14132 | 2183 |
|
2184 |
// For encryption, the java expanded key ordering is just what we need |
|
2185 |
||
14834 | 2186 |
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); |
2187 |
__ pxor(xmm_result, xmm_temp1); |
|
2188 |
||
2189 |
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
|
2190 |
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
|
2191 |
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
|
2192 |
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
|
2193 |
||
2194 |
__ aesenc(xmm_result, xmm_temp1); |
|
2195 |
__ aesenc(xmm_result, xmm_temp2); |
|
2196 |
__ aesenc(xmm_result, xmm_temp3); |
|
2197 |
__ aesenc(xmm_result, xmm_temp4); |
|
2198 |
||
2199 |
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
|
2200 |
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
|
2201 |
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2202 |
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2203 |
||
2204 |
__ aesenc(xmm_result, xmm_temp1); |
|
2205 |
__ aesenc(xmm_result, xmm_temp2); |
|
2206 |
__ aesenc(xmm_result, xmm_temp3); |
|
2207 |
__ aesenc(xmm_result, xmm_temp4); |
|
2208 |
||
2209 |
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2210 |
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2211 |
||
2212 |
__ cmpl(keylen, 44); |
|
2213 |
__ jccb(Assembler::equal, L_doLast); |
|
2214 |
||
2215 |
__ aesenc(xmm_result, xmm_temp1); |
|
2216 |
__ aesenc(xmm_result, xmm_temp2); |
|
2217 |
||
2218 |
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2219 |
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2220 |
||
2221 |
__ cmpl(keylen, 52); |
|
2222 |
__ jccb(Assembler::equal, L_doLast); |
|
2223 |
||
2224 |
__ aesenc(xmm_result, xmm_temp1); |
|
2225 |
__ aesenc(xmm_result, xmm_temp2); |
|
2226 |
||
2227 |
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2228 |
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
|
14132 | 2229 |
|
2230 |
__ BIND(L_doLast); |
|
14834 | 2231 |
__ aesenc(xmm_result, xmm_temp1); |
2232 |
__ aesenclast(xmm_result, xmm_temp2); |
|
14132 | 2233 |
__ movdqu(Address(to, 0), xmm_result); // store the result |
2234 |
__ xorptr(rax, rax); // return 0 |
|
2235 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2236 |
__ ret(0); |
|
2237 |
||
2238 |
return start; |
|
2239 |
} |
|
2240 |
||
2241 |
||
2242 |
// Arguments: |
|
2243 |
// |
|
2244 |
// Inputs: |
|
2245 |
// c_rarg0 - source byte array address |
|
2246 |
// c_rarg1 - destination byte array address |
|
2247 |
// c_rarg2 - K (key) in little endian int array |
|
2248 |
// |
|
2249 |
address generate_aescrypt_decryptBlock() { |
|
14834 | 2250 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2251 |
__ align(CodeEntryAlignment); |
2252 |
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
|
2253 |
Label L_doLast; |
|
2254 |
address start = __ pc(); |
|
2255 |
||
14834 | 2256 |
const Register from = rdx; // source array address |
14132 | 2257 |
const Register to = rdx; // destination array address |
2258 |
const Register key = rcx; // key array address |
|
2259 |
const Register keylen = rax; |
|
2260 |
const Address from_param(rbp, 8+0); |
|
2261 |
const Address to_param (rbp, 8+4); |
|
2262 |
const Address key_param (rbp, 8+8); |
|
2263 |
||
2264 |
const XMMRegister xmm_result = xmm0; |
|
14834 | 2265 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
2266 |
const XMMRegister xmm_temp1 = xmm2; |
|
2267 |
const XMMRegister xmm_temp2 = xmm3; |
|
2268 |
const XMMRegister xmm_temp3 = xmm4; |
|
2269 |
const XMMRegister xmm_temp4 = xmm5; |
|
14132 | 2270 |
|
2271 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2272 |
|
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2273 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2274 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2275 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2276 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2277 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2278 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2279 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2280 |
|
14834 | 2281 |
__ movptr(from, from_param); |
2282 |
__ movptr(key, key_param); |
|
2283 |
||
2284 |
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
14132 | 2285 |
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2286 |
||
2287 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2288 |
__ movdqu(xmm_result, Address(from, 0)); |
|
14834 | 2289 |
__ movptr(to, to_param); |
14132 | 2290 |
|
2291 |
// for decryption java expanded key ordering is rotated one position from what we want |
|
2292 |
// so we start from 0x10 here and hit 0x00 last |
|
2293 |
// we don't know if the key is aligned, hence not using load-execute form |
|
14834 | 2294 |
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2295 |
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
|
2296 |
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
|
2297 |
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
|
2298 |
||
2299 |
__ pxor (xmm_result, xmm_temp1); |
|
2300 |
__ aesdec(xmm_result, xmm_temp2); |
|
2301 |
__ aesdec(xmm_result, xmm_temp3); |
|
2302 |
__ aesdec(xmm_result, xmm_temp4); |
|
2303 |
||
2304 |
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
|
2305 |
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
|
2306 |
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
|
2307 |
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
|
2308 |
||
2309 |
__ aesdec(xmm_result, xmm_temp1); |
|
2310 |
__ aesdec(xmm_result, xmm_temp2); |
|
2311 |
__ aesdec(xmm_result, xmm_temp3); |
|
2312 |
__ aesdec(xmm_result, xmm_temp4); |
|
2313 |
||
2314 |
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); |
|
2315 |
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); |
|
2316 |
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); |
|
2317 |
||
2318 |
__ cmpl(keylen, 44); |
|
2319 |
__ jccb(Assembler::equal, L_doLast); |
|
2320 |
||
2321 |
__ aesdec(xmm_result, xmm_temp1); |
|
2322 |
__ aesdec(xmm_result, xmm_temp2); |
|
2323 |
||
2324 |
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); |
|
2325 |
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); |
|
2326 |
||
2327 |
__ cmpl(keylen, 52); |
|
2328 |
__ jccb(Assembler::equal, L_doLast); |
|
2329 |
||
2330 |
__ aesdec(xmm_result, xmm_temp1); |
|
2331 |
__ aesdec(xmm_result, xmm_temp2); |
|
2332 |
||
2333 |
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); |
|
2334 |
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); |
|
14132 | 2335 |
|
2336 |
__ BIND(L_doLast); |
|
14834 | 2337 |
__ aesdec(xmm_result, xmm_temp1); |
2338 |
__ aesdec(xmm_result, xmm_temp2); |
|
2339 |
||
14132 | 2340 |
// for decryption the aesdeclast operation is always on key+0x00 |
14834 | 2341 |
__ aesdeclast(xmm_result, xmm_temp3); |
14132 | 2342 |
__ movdqu(Address(to, 0), xmm_result); // store the result |
2343 |
__ xorptr(rax, rax); // return 0 |
|
2344 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2345 |
__ ret(0); |
|
2346 |
||
2347 |
return start; |
|
2348 |
} |
|
2349 |
||
2350 |
void handleSOERegisters(bool saving) { |
|
2351 |
const int saveFrameSizeInBytes = 4 * wordSize; |
|
2352 |
const Address saved_rbx (rbp, -3 * wordSize); |
|
2353 |
const Address saved_rsi (rbp, -2 * wordSize); |
|
2354 |
const Address saved_rdi (rbp, -1 * wordSize); |
|
2355 |
||
2356 |
if (saving) { |
|
2357 |
__ subptr(rsp, saveFrameSizeInBytes); |
|
2358 |
__ movptr(saved_rsi, rsi); |
|
2359 |
__ movptr(saved_rdi, rdi); |
|
2360 |
__ movptr(saved_rbx, rbx); |
|
2361 |
} else { |
|
2362 |
// restoring |
|
2363 |
__ movptr(rsi, saved_rsi); |
|
2364 |
__ movptr(rdi, saved_rdi); |
|
2365 |
__ movptr(rbx, saved_rbx); |
|
2366 |
} |
|
2367 |
} |
|
2368 |
||
2369 |
// Arguments: |
|
2370 |
// |
|
2371 |
// Inputs: |
|
2372 |
// c_rarg0 - source byte array address |
|
2373 |
// c_rarg1 - destination byte array address |
|
2374 |
// c_rarg2 - K (key) in little endian int array |
|
2375 |
// c_rarg3 - r vector byte array address |
|
2376 |
// c_rarg4 - input length |
|
2377 |
// |
|
22505 | 2378 |
// Output: |
2379 |
// rax - input length |
|
2380 |
// |
|
14132 | 2381 |
address generate_cipherBlockChaining_encryptAESCrypt() { |
14834 | 2382 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2383 |
__ align(CodeEntryAlignment); |
2384 |
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); |
|
2385 |
address start = __ pc(); |
|
2386 |
||
2387 |
Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; |
|
2388 |
const Register from = rsi; // source array address |
|
2389 |
const Register to = rdx; // destination array address |
|
2390 |
const Register key = rcx; // key array address |
|
2391 |
const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2392 |
// and left with the results of the last encryption block |
|
2393 |
const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2394 |
const Register pos = rax; |
|
2395 |
||
2396 |
// xmm register assignments for the loops below |
|
2397 |
const XMMRegister xmm_result = xmm0; |
|
2398 |
const XMMRegister xmm_temp = xmm1; |
|
2399 |
// first 6 keys preloaded into xmm2-xmm7 |
|
2400 |
const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2401 |
const int XMM_REG_NUM_KEY_LAST = 7; |
|
2402 |
const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2403 |
||
2404 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2405 |
handleSOERegisters(true /*saving*/); |
|
2406 |
||
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2407 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2408 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2409 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2410 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2411 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2412 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2413 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2414 |
|
14132 | 2415 |
// load registers from incoming parameters |
2416 |
const Address from_param(rbp, 8+0); |
|
2417 |
const Address to_param (rbp, 8+4); |
|
2418 |
const Address key_param (rbp, 8+8); |
|
2419 |
const Address rvec_param (rbp, 8+12); |
|
2420 |
const Address len_param (rbp, 8+16); |
|
2421 |
__ movptr(from , from_param); |
|
2422 |
__ movptr(to , to_param); |
|
2423 |
__ movptr(key , key_param); |
|
2424 |
__ movptr(rvec , rvec_param); |
|
2425 |
__ movptr(len_reg , len_param); |
|
2426 |
||
2427 |
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front |
|
2428 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2429 |
// load up xmm regs 2 thru 7 with keys 0-5 |
|
2430 |
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2431 |
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2432 |
offset += 0x10; |
|
2433 |
} |
|
2434 |
||
2435 |
__ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec |
|
2436 |
||
2437 |
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2438 |
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2439 |
__ cmpl(rax, 44); |
|
2440 |
__ jcc(Assembler::notEqual, L_key_192_256); |
|
2441 |
||
2442 |
// 128 bit code follows here |
|
14834 | 2443 |
__ movl(pos, 0); |
14132 | 2444 |
__ align(OptoLoopAlignment); |
2445 |
__ BIND(L_loopTop_128); |
|
2446 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
|
2447 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2448 |
||
2449 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2450 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2451 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2452 |
} |
|
2453 |
for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { |
|
2454 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2455 |
} |
|
2456 |
load_key(xmm_temp, key, 0xa0); |
|
2457 |
__ aesenclast(xmm_result, xmm_temp); |
|
2458 |
||
2459 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2460 |
// no need to store r to memory until we exit |
|
2461 |
__ addptr(pos, AESBlockSize); |
|
2462 |
__ subptr(len_reg, AESBlockSize); |
|
2463 |
__ jcc(Assembler::notEqual, L_loopTop_128); |
|
2464 |
||
2465 |
__ BIND(L_exit); |
|
2466 |
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object |
|
2467 |
||
2468 |
handleSOERegisters(false /*restoring*/); |
|
22505 | 2469 |
__ movptr(rax, len_param); // return length |
14132 | 2470 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
2471 |
__ ret(0); |
|
2472 |
||
14834 | 2473 |
__ BIND(L_key_192_256); |
2474 |
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
14132 | 2475 |
__ cmpl(rax, 52); |
2476 |
__ jcc(Assembler::notEqual, L_key_256); |
|
2477 |
||
2478 |
// 192-bit code follows here (could be changed to use more xmm registers) |
|
14834 | 2479 |
__ movl(pos, 0); |
2480 |
__ align(OptoLoopAlignment); |
|
2481 |
__ BIND(L_loopTop_192); |
|
14132 | 2482 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2483 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2484 |
||
2485 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2486 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2487 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2488 |
} |
|
2489 |
for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { |
|
2490 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2491 |
} |
|
2492 |
load_key(xmm_temp, key, 0xc0); |
|
2493 |
__ aesenclast(xmm_result, xmm_temp); |
|
2494 |
||
2495 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2496 |
// no need to store r to memory until we exit |
|
2497 |
__ addptr(pos, AESBlockSize); |
|
2498 |
__ subptr(len_reg, AESBlockSize); |
|
2499 |
__ jcc(Assembler::notEqual, L_loopTop_192); |
|
2500 |
__ jmp(L_exit); |
|
2501 |
||
14834 | 2502 |
__ BIND(L_key_256); |
14132 | 2503 |
// 256-bit code follows here (could be changed to use more xmm registers) |
14834 | 2504 |
__ movl(pos, 0); |
2505 |
__ align(OptoLoopAlignment); |
|
2506 |
__ BIND(L_loopTop_256); |
|
14132 | 2507 |
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2508 |
__ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2509 |
||
2510 |
__ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2511 |
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2512 |
__ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2513 |
} |
|
2514 |
for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { |
|
2515 |
aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2516 |
} |
|
2517 |
load_key(xmm_temp, key, 0xe0); |
|
2518 |
__ aesenclast(xmm_result, xmm_temp); |
|
2519 |
||
2520 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2521 |
// no need to store r to memory until we exit |
|
2522 |
__ addptr(pos, AESBlockSize); |
|
2523 |
__ subptr(len_reg, AESBlockSize); |
|
2524 |
__ jcc(Assembler::notEqual, L_loopTop_256); |
|
2525 |
__ jmp(L_exit); |
|
2526 |
||
2527 |
return start; |
|
2528 |
} |
|
2529 |
||
2530 |
||
2531 |
// CBC AES Decryption. |
|
2532 |
// In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. |
|
2533 |
// |
|
2534 |
// Arguments: |
|
2535 |
// |
|
2536 |
// Inputs: |
|
2537 |
// c_rarg0 - source byte array address |
|
2538 |
// c_rarg1 - destination byte array address |
|
2539 |
// c_rarg2 - K (key) in little endian int array |
|
2540 |
// c_rarg3 - r vector byte array address |
|
2541 |
// c_rarg4 - input length |
|
2542 |
// |
|
22505 | 2543 |
// Output: |
2544 |
// rax - input length |
|
2545 |
// |
|
14132 | 2546 |
|
36825 | 2547 |
address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { |
14834 | 2548 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
14132 | 2549 |
__ align(CodeEntryAlignment); |
2550 |
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); |
|
2551 |
address start = __ pc(); |
|
2552 |
||
2553 |
const Register from = rsi; // source array address |
|
2554 |
const Register to = rdx; // destination array address |
|
2555 |
const Register key = rcx; // key array address |
|
2556 |
const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2557 |
// and left with the results of the last encryption block |
|
2558 |
const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2559 |
const Register pos = rax; |
|
2560 |
||
36825 | 2561 |
const int PARALLEL_FACTOR = 4; |
2562 |
const int ROUNDS[3] = { 10, 12, 14 }; //aes rounds for key128, key192, key256 |
|
2563 |
||
2564 |
Label L_exit; |
|
2565 |
Label L_singleBlock_loopTop[3]; //128, 192, 256 |
|
2566 |
Label L_multiBlock_loopTop[3]; //128, 192, 256 |
|
2567 |
||
2568 |
const XMMRegister xmm_prev_block_cipher = xmm0; // holds cipher of previous block |
|
2569 |
const XMMRegister xmm_key_shuf_mask = xmm1; |
|
2570 |
||
2571 |
const XMMRegister xmm_key_tmp0 = xmm2; |
|
2572 |
const XMMRegister xmm_key_tmp1 = xmm3; |
|
2573 |
||
2574 |
// registers holding the six results in the parallelized loop |
|
2575 |
const XMMRegister xmm_result0 = xmm4; |
|
2576 |
const XMMRegister xmm_result1 = xmm5; |
|
2577 |
const XMMRegister xmm_result2 = xmm6; |
|
2578 |
const XMMRegister xmm_result3 = xmm7; |
|
14132 | 2579 |
|
2580 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2581 |
handleSOERegisters(true /*saving*/); |
|
2582 |
||
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2583 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2584 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2585 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2586 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2587 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2588 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2589 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
2590 |
|
14132 | 2591 |
// load registers from incoming parameters |
2592 |
const Address from_param(rbp, 8+0); |
|
2593 |
const Address to_param (rbp, 8+4); |
|
2594 |
const Address key_param (rbp, 8+8); |
|
2595 |
const Address rvec_param (rbp, 8+12); |
|
2596 |
const Address len_param (rbp, 8+16); |
|
36825 | 2597 |
|
14132 | 2598 |
__ movptr(from , from_param); |
2599 |
__ movptr(to , to_param); |
|
2600 |
__ movptr(key , key_param); |
|
2601 |
__ movptr(rvec , rvec_param); |
|
2602 |
__ movptr(len_reg , len_param); |
|
2603 |
||
2604 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
36825 | 2605 |
__ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec |
2606 |
||
2607 |
__ xorptr(pos, pos); |
|
14132 | 2608 |
|
2609 |
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
36825 | 2610 |
// rvec is reused |
2611 |
__ movl(rvec, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2612 |
__ cmpl(rvec, 52); |
|
2613 |
__ jcc(Assembler::equal, L_multiBlock_loopTop[1]); |
|
2614 |
__ cmpl(rvec, 60); |
|
2615 |
__ jcc(Assembler::equal, L_multiBlock_loopTop[2]); |
|
2616 |
||
2617 |
#define DoFour(opc, src_reg) \ |
|
2618 |
__ opc(xmm_result0, src_reg); \ |
|
2619 |
__ opc(xmm_result1, src_reg); \ |
|
2620 |
__ opc(xmm_result2, src_reg); \ |
|
2621 |
__ opc(xmm_result3, src_reg); \ |
|
2622 |
||
2623 |
for (int k = 0; k < 3; ++k) { |
|
2624 |
__ align(OptoLoopAlignment); |
|
2625 |
__ BIND(L_multiBlock_loopTop[k]); |
|
2626 |
__ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left |
|
2627 |
__ jcc(Assembler::less, L_singleBlock_loopTop[k]); |
|
2628 |
||
2629 |
__ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers |
|
2630 |
__ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); |
|
2631 |
__ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); |
|
2632 |
__ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); |
|
2633 |
||
2634 |
// the java expanded key ordering is rotated one position from what we want |
|
2635 |
// so we start from 0x10 here and hit 0x00 last |
|
2636 |
load_key(xmm_key_tmp0, key, 0x10, xmm_key_shuf_mask); |
|
2637 |
DoFour(pxor, xmm_key_tmp0); //xor with first key |
|
2638 |
// do the aes dec rounds |
|
2639 |
for (int rnum = 1; rnum <= ROUNDS[k];) { |
|
2640 |
//load two keys at a time |
|
2641 |
//k1->0x20, ..., k9->0xa0, k10->0x00 |
|
2642 |
load_key(xmm_key_tmp1, key, (rnum + 1) * 0x10, xmm_key_shuf_mask); |
|
2643 |
load_key(xmm_key_tmp0, key, ((rnum + 2) % (ROUNDS[k] + 1)) * 0x10, xmm_key_shuf_mask); // hit 0x00 last! |
|
2644 |
DoFour(aesdec, xmm_key_tmp1); |
|
2645 |
rnum++; |
|
2646 |
if (rnum != ROUNDS[k]) { |
|
2647 |
DoFour(aesdec, xmm_key_tmp0); |
|
2648 |
} |
|
2649 |
else { |
|
2650 |
DoFour(aesdeclast, xmm_key_tmp0); |
|
2651 |
} |
|
2652 |
rnum++; |
|
2653 |
} |
|
2654 |
||
2655 |
// for each result, xor with the r vector of previous cipher block |
|
2656 |
__ pxor(xmm_result0, xmm_prev_block_cipher); |
|
2657 |
__ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize)); |
|
2658 |
__ pxor(xmm_result1, xmm_prev_block_cipher); |
|
2659 |
__ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize)); |
|
2660 |
__ pxor(xmm_result2, xmm_prev_block_cipher); |
|
2661 |
__ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize)); |
|
2662 |
__ pxor(xmm_result3, xmm_prev_block_cipher); |
|
2663 |
__ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize)); // this will carry over to next set of blocks |
|
2664 |
||
2665 |
// store 4 results into the next 64 bytes of output |
|
2666 |
__ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); |
|
2667 |
__ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); |
|
2668 |
__ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); |
|
2669 |
__ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); |
|
2670 |
||
2671 |
__ addptr(pos, 4 * AESBlockSize); |
|
2672 |
__ subptr(len_reg, 4 * AESBlockSize); |
|
2673 |
__ jmp(L_multiBlock_loopTop[k]); |
|
2674 |
||
2675 |
//singleBlock starts here |
|
2676 |
__ align(OptoLoopAlignment); |
|
2677 |
__ BIND(L_singleBlock_loopTop[k]); |
|
2678 |
__ cmpptr(len_reg, 0); // any blocks left? |
|
2679 |
__ jcc(Assembler::equal, L_exit); |
|
2680 |
__ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2681 |
__ movdqa(xmm_result1, xmm_result0); |
|
2682 |
||
2683 |
load_key(xmm_key_tmp0, key, 0x10, xmm_key_shuf_mask); |
|
2684 |
__ pxor(xmm_result0, xmm_key_tmp0); |
|
2685 |
// do the aes dec rounds |
|
2686 |
for (int rnum = 1; rnum < ROUNDS[k]; rnum++) { |
|
2687 |
// the java expanded key ordering is rotated one position from what we want |
|
2688 |
load_key(xmm_key_tmp0, key, (rnum + 1) * 0x10, xmm_key_shuf_mask); |
|
2689 |
__ aesdec(xmm_result0, xmm_key_tmp0); |
|
2690 |
} |
|
2691 |
load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask); |
|
2692 |
__ aesdeclast(xmm_result0, xmm_key_tmp0); |
|
2693 |
__ pxor(xmm_result0, xmm_prev_block_cipher); // xor with the current r vector |
|
2694 |
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result0); // store into the next 16 bytes of output |
|
2695 |
// no need to store r to memory until we exit |
|
2696 |
__ movdqa(xmm_prev_block_cipher, xmm_result1); // set up next r vector with cipher input from this block |
|
2697 |
||
2698 |
__ addptr(pos, AESBlockSize); |
|
2699 |
__ subptr(len_reg, AESBlockSize); |
|
2700 |
__ jmp(L_singleBlock_loopTop[k]); |
|
2701 |
}//for 128/192/256 |
|
14132 | 2702 |
|
2703 |
__ BIND(L_exit); |
|
36825 | 2704 |
__ movptr(rvec, rvec_param); // restore this since reused earlier |
2705 |
__ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object |
|
14132 | 2706 |
handleSOERegisters(false /*restoring*/); |
36825 | 2707 |
__ movptr(rax, len_param); // return length |
2708 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
14132 | 2709 |
__ ret(0); |
2710 |
||
2711 |
return start; |
|
2712 |
} |
|
2713 |
||
35154 | 2714 |
// CTR AES crypt. |
2715 |
// In 32-bit stub, parallelize 4 blocks at a time |
|
2716 |
// Arguments: |
|
2717 |
// |
|
2718 |
// Inputs: |
|
2719 |
// c_rarg0 - source byte array address |
|
2720 |
// c_rarg1 - destination byte array address |
|
2721 |
// c_rarg2 - K (key) in little endian int array |
|
2722 |
// c_rarg3 - counter vector byte array address |
|
2723 |
// c_rarg4 - input length |
|
2724 |
// |
|
2725 |
// Output: |
|
2726 |
// rax - input length |
|
2727 |
// |
|
2728 |
address generate_counterMode_AESCrypt_Parallel() { |
|
2729 |
assert(UseAES, "need AES instructions and misaligned SSE support"); |
|
2730 |
__ align(CodeEntryAlignment); |
|
2731 |
StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt"); |
|
2732 |
address start = __ pc(); |
|
2733 |
const Register from = rsi; // source array address |
|
2734 |
const Register to = rdx; // destination array address |
|
2735 |
const Register key = rcx; // key array address |
|
2736 |
const Register counter = rdi; // counter byte array initialized from initvector array address |
|
35537
bed5e2dc57a1
8146581: Minor corrections to the patch submitted for earlier bug id - 8143925
kvn
parents:
35154
diff
changeset
|
2737 |
// and updated with the incremented counter in the end |
35154 | 2738 |
const Register len_reg = rbx; |
2739 |
const Register pos = rax; |
|
2740 |
||
2741 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2742 |
handleSOERegisters(true /*saving*/); // save rbx, rsi, rdi |
|
2743 |
||
2744 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
|
2745 |
// context for the registers used, where all instructions below are using 128-bit mode |
|
2746 |
// On EVEX without VL and BW, these instructions will all be AVX. |
|
2747 |
if (VM_Version::supports_avx512vlbw()) { |
|
2748 |
__ movl(rdx, 0xffff); |
|
2749 |
__ kmovdl(k1, rdx); |
|
2750 |
} |
|
2751 |
||
2752 |
// load registers from incoming parameters |
|
2753 |
const Address from_param(rbp, 8+0); |
|
2754 |
const Address to_param (rbp, 8+4); |
|
2755 |
const Address key_param (rbp, 8+8); |
|
2756 |
const Address rvec_param (rbp, 8+12); |
|
2757 |
const Address len_param (rbp, 8+16); |
|
2758 |
const Address saved_counter_param(rbp, 8 + 20); |
|
2759 |
const Address used_addr_param(rbp, 8 + 24); |
|
2760 |
||
2761 |
__ movptr(from , from_param); |
|
2762 |
__ movptr(to , to_param); |
|
2763 |
__ movptr(len_reg , len_param); |
|
2764 |
||
2765 |
// Use the partially used encrpyted counter from last invocation |
|
2766 |
Label L_exit_preLoop, L_preLoop_start; |
|
2767 |
||
2768 |
// Use the registers 'counter' and 'key' here in this preloop |
|
2769 |
// to hold of last 2 params 'used' and 'saved_encCounter_start' |
|
2770 |
Register used = counter; |
|
2771 |
Register saved_encCounter_start = key; |
|
2772 |
Register used_addr = saved_encCounter_start; |
|
2773 |
||
2774 |
__ movptr(used_addr, used_addr_param); |
|
2775 |
__ movptr(used, Address(used_addr, 0)); |
|
2776 |
__ movptr(saved_encCounter_start, saved_counter_param); |
|
2777 |
||
2778 |
__ BIND(L_preLoop_start); |
|
2779 |
__ cmpptr(used, 16); |
|
2780 |
__ jcc(Assembler::aboveEqual, L_exit_preLoop); |
|
2781 |
__ cmpptr(len_reg, 0); |
|
2782 |
__ jcc(Assembler::lessEqual, L_exit_preLoop); |
|
2783 |
__ movb(rax, Address(saved_encCounter_start, used)); |
|
2784 |
__ xorb(rax, Address(from, 0)); |
|
2785 |
__ movb(Address(to, 0), rax); |
|
2786 |
__ addptr(from, 1); |
|
2787 |
__ addptr(to, 1); |
|
2788 |
__ addptr(used, 1); |
|
2789 |
__ subptr(len_reg, 1); |
|
2790 |
||
2791 |
__ jmp(L_preLoop_start); |
|
2792 |
||
2793 |
__ BIND(L_exit_preLoop); |
|
2794 |
__ movptr(used_addr, used_addr_param); |
|
2795 |
__ movptr(used_addr, used_addr_param); |
|
2796 |
__ movl(Address(used_addr, 0), used); |
|
2797 |
||
2798 |
// load the parameters 'key' and 'counter' |
|
2799 |
__ movptr(key, key_param); |
|
2800 |
__ movptr(counter, rvec_param); |
|
2801 |
||
2802 |
// xmm register assignments for the loops below |
|
2803 |
const XMMRegister xmm_curr_counter = xmm0; |
|
2804 |
const XMMRegister xmm_counter_shuf_mask = xmm1; // need to be reloaded |
|
2805 |
const XMMRegister xmm_key_shuf_mask = xmm2; // need to be reloaded |
|
2806 |
const XMMRegister xmm_key = xmm3; |
|
2807 |
const XMMRegister xmm_result0 = xmm4; |
|
2808 |
const XMMRegister xmm_result1 = xmm5; |
|
2809 |
const XMMRegister xmm_result2 = xmm6; |
|
2810 |
const XMMRegister xmm_result3 = xmm7; |
|
2811 |
const XMMRegister xmm_from0 = xmm1; //reuse XMM register |
|
2812 |
const XMMRegister xmm_from1 = xmm2; |
|
2813 |
const XMMRegister xmm_from2 = xmm3; |
|
2814 |
const XMMRegister xmm_from3 = xmm4; |
|
2815 |
||
2816 |
//for key_128, key_192, key_256 |
|
2817 |
const int rounds[3] = {10, 12, 14}; |
|
2818 |
Label L_singleBlockLoopTop[3]; |
|
2819 |
Label L_multiBlock_loopTop[3]; |
|
2820 |
Label L_key192_top, L_key256_top; |
|
2821 |
Label L_incCounter[3][4]; // 3: different key length, 4: 4 blocks at a time |
|
2822 |
Label L_incCounter_single[3]; //for single block, key128, key192, key256 |
|
2823 |
Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3]; |
|
2824 |
Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3]; |
|
2825 |
||
2826 |
Label L_exit; |
|
2827 |
const int PARALLEL_FACTOR = 4; //because of the limited register number |
|
2828 |
||
2829 |
// initialize counter with initial counter |
|
2830 |
__ movdqu(xmm_curr_counter, Address(counter, 0x00)); |
|
2831 |
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr())); |
|
2832 |
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase |
|
2833 |
||
2834 |
// key length could be only {11, 13, 15} * 4 = {44, 52, 60} |
|
2835 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2836 |
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2837 |
__ cmpl(rax, 52); |
|
2838 |
__ jcc(Assembler::equal, L_key192_top); |
|
2839 |
__ cmpl(rax, 60); |
|
2840 |
__ jcc(Assembler::equal, L_key256_top); |
|
2841 |
||
2842 |
//key128 begins here |
|
2843 |
__ movptr(pos, 0); // init pos before L_multiBlock_loopTop |
|
2844 |
||
2845 |
#define CTR_DoFour(opc, src_reg) \ |
|
2846 |
__ opc(xmm_result0, src_reg); \ |
|
2847 |
__ opc(xmm_result1, src_reg); \ |
|
2848 |
__ opc(xmm_result2, src_reg); \ |
|
2849 |
__ opc(xmm_result3, src_reg); |
|
2850 |
||
2851 |
// k == 0 : generate code for key_128 |
|
2852 |
// k == 1 : generate code for key_192 |
|
2853 |
// k == 2 : generate code for key_256 |
|
2854 |
for (int k = 0; k < 3; ++k) { |
|
2855 |
//multi blocks starts here |
|
2856 |
__ align(OptoLoopAlignment); |
|
2857 |
__ BIND(L_multiBlock_loopTop[k]); |
|
2858 |
__ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left |
|
2859 |
__ jcc(Assembler::less, L_singleBlockLoopTop[k]); |
|
2860 |
||
2861 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2862 |
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr())); |
|
2863 |
||
2864 |
//load, then increase counters |
|
2865 |
CTR_DoFour(movdqa, xmm_curr_counter); |
|
2866 |
__ push(rbx); |
|
2867 |
inc_counter(rbx, xmm_result1, 0x01, L_incCounter[k][0]); |
|
2868 |
inc_counter(rbx, xmm_result2, 0x02, L_incCounter[k][1]); |
|
2869 |
inc_counter(rbx, xmm_result3, 0x03, L_incCounter[k][2]); |
|
2870 |
inc_counter(rbx, xmm_curr_counter, 0x04, L_incCounter[k][3]); |
|
2871 |
__ pop (rbx); |
|
2872 |
||
2873 |
load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); // load Round 0 key. interleaving for better performance |
|
2874 |
||
2875 |
CTR_DoFour(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR |
|
2876 |
CTR_DoFour(pxor, xmm_key); //PXOR with Round 0 key |
|
2877 |
||
2878 |
for (int i = 1; i < rounds[k]; ++i) { |
|
2879 |
load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask); |
|
2880 |
CTR_DoFour(aesenc, xmm_key); |
|
2881 |
} |
|
2882 |
load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask); |
|
2883 |
CTR_DoFour(aesenclast, xmm_key); |
|
2884 |
||
2885 |
// get next PARALLEL_FACTOR blocks into xmm_from registers |
|
2886 |
__ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); |
|
2887 |
__ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); |
|
2888 |
__ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); |
|
2889 |
||
2890 |
// PXOR with input text |
|
2891 |
__ pxor(xmm_result0, xmm_from0); //result0 is xmm4 |
|
2892 |
__ pxor(xmm_result1, xmm_from1); |
|
2893 |
__ pxor(xmm_result2, xmm_from2); |
|
2894 |
||
2895 |
// store PARALLEL_FACTOR results into the next 64 bytes of output |
|
2896 |
__ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); |
|
2897 |
__ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); |
|
2898 |
__ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); |
|
2899 |
||
2900 |
// do it here after xmm_result0 is saved, because xmm_from3 reuse the same register of xmm_result0. |
|
2901 |
__ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); |
|
2902 |
__ pxor(xmm_result3, xmm_from3); |
|
2903 |
__ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); |
|
2904 |
||
2905 |
__ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text |
|
2906 |
__ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length |
|
2907 |
__ jmp(L_multiBlock_loopTop[k]); |
|
2908 |
||
2909 |
// singleBlock starts here |
|
2910 |
__ align(OptoLoopAlignment); |
|
2911 |
__ BIND(L_singleBlockLoopTop[k]); |
|
2912 |
__ cmpptr(len_reg, 0); |
|
2913 |
__ jcc(Assembler::equal, L_exit); |
|
2914 |
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2915 |
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr())); |
|
2916 |
__ movdqa(xmm_result0, xmm_curr_counter); |
|
2917 |
load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); |
|
2918 |
__ push(rbx);//rbx is used for increasing counter |
|
2919 |
inc_counter(rbx, xmm_curr_counter, 0x01, L_incCounter_single[k]); |
|
2920 |
__ pop (rbx); |
|
2921 |
__ pshufb(xmm_result0, xmm_counter_shuf_mask); |
|
2922 |
__ pxor(xmm_result0, xmm_key); |
|
2923 |
for (int i = 1; i < rounds[k]; i++) { |
|
2924 |
load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask); |
|
2925 |
__ aesenc(xmm_result0, xmm_key); |
|
2926 |
} |
|
2927 |
load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask); |
|
2928 |
__ aesenclast(xmm_result0, xmm_key); |
|
2929 |
__ cmpptr(len_reg, AESBlockSize); |
|
2930 |
__ jcc(Assembler::less, L_processTail_insr[k]); |
|
2931 |
__ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); |
|
2932 |
__ pxor(xmm_result0, xmm_from0); |
|
2933 |
__ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); |
|
2934 |
__ addptr(pos, AESBlockSize); |
|
2935 |
__ subptr(len_reg, AESBlockSize); |
|
2936 |
__ jmp(L_singleBlockLoopTop[k]); |
|
2937 |
||
35537
bed5e2dc57a1
8146581: Minor corrections to the patch submitted for earlier bug id - 8143925
kvn
parents:
35154
diff
changeset
|
2938 |
__ BIND(L_processTail_insr[k]); // Process the tail part of the input array |
bed5e2dc57a1
8146581: Minor corrections to the patch submitted for earlier bug id - 8143925
kvn
parents:
35154
diff
changeset
|
2939 |
__ addptr(pos, len_reg); // 1. Insert bytes from src array into xmm_from0 register |
35154 | 2940 |
__ testptr(len_reg, 8); |
2941 |
__ jcc(Assembler::zero, L_processTail_4_insr[k]); |
|
2942 |
__ subptr(pos,8); |
|
2943 |
__ pinsrd(xmm_from0, Address(from, pos), 0); |
|
2944 |
__ pinsrd(xmm_from0, Address(from, pos, Address::times_1, 4), 1); |
|
2945 |
__ BIND(L_processTail_4_insr[k]); |
|
2946 |
__ testptr(len_reg, 4); |
|
2947 |
__ jcc(Assembler::zero, L_processTail_2_insr[k]); |
|
2948 |
__ subptr(pos,4); |
|
2949 |
__ pslldq(xmm_from0, 4); |
|
2950 |
__ pinsrd(xmm_from0, Address(from, pos), 0); |
|
2951 |
__ BIND(L_processTail_2_insr[k]); |
|
2952 |
__ testptr(len_reg, 2); |
|
2953 |
__ jcc(Assembler::zero, L_processTail_1_insr[k]); |
|
2954 |
__ subptr(pos, 2); |
|
2955 |
__ pslldq(xmm_from0, 2); |
|
2956 |
__ pinsrw(xmm_from0, Address(from, pos), 0); |
|
2957 |
__ BIND(L_processTail_1_insr[k]); |
|
2958 |
__ testptr(len_reg, 1); |
|
2959 |
__ jcc(Assembler::zero, L_processTail_exit_insr[k]); |
|
2960 |
__ subptr(pos, 1); |
|
2961 |
__ pslldq(xmm_from0, 1); |
|
2962 |
__ pinsrb(xmm_from0, Address(from, pos), 0); |
|
2963 |
__ BIND(L_processTail_exit_insr[k]); |
|
2964 |
||
2965 |
__ movptr(saved_encCounter_start, saved_counter_param); |
|
35537
bed5e2dc57a1
8146581: Minor corrections to the patch submitted for earlier bug id - 8143925
kvn
parents:
35154
diff
changeset
|
2966 |
__ movdqu(Address(saved_encCounter_start, 0), xmm_result0); // 2. Perform pxor of the encrypted counter and plaintext Bytes. |
bed5e2dc57a1
8146581: Minor corrections to the patch submitted for earlier bug id - 8143925
kvn
parents:
35154
diff
changeset
|
2967 |
__ pxor(xmm_result0, xmm_from0); // Also the encrypted counter is saved for next invocation. |
35154 | 2968 |
|
2969 |
__ testptr(len_reg, 8); |
|
35537
bed5e2dc57a1
8146581: Minor corrections to the patch submitted for earlier bug id - 8143925
kvn
parents:
35154
diff
changeset
|
2970 |
__ jcc(Assembler::zero, L_processTail_4_extr[k]); // 3. Extract bytes from xmm_result0 into the dest. array |
35154 | 2971 |
__ pextrd(Address(to, pos), xmm_result0, 0); |
2972 |
__ pextrd(Address(to, pos, Address::times_1, 4), xmm_result0, 1); |
|
2973 |
__ psrldq(xmm_result0, 8); |
|
2974 |
__ addptr(pos, 8); |
|
2975 |
__ BIND(L_processTail_4_extr[k]); |
|
2976 |
__ testptr(len_reg, 4); |
|
2977 |
__ jcc(Assembler::zero, L_processTail_2_extr[k]); |
|
2978 |
__ pextrd(Address(to, pos), xmm_result0, 0); |
|
2979 |
__ psrldq(xmm_result0, 4); |
|
2980 |
__ addptr(pos, 4); |
|
2981 |
__ BIND(L_processTail_2_extr[k]); |
|
2982 |
__ testptr(len_reg, 2); |
|
2983 |
__ jcc(Assembler::zero, L_processTail_1_extr[k]); |
|
2984 |
__ pextrb(Address(to, pos), xmm_result0, 0); |
|
2985 |
__ pextrb(Address(to, pos, Address::times_1, 1), xmm_result0, 1); |
|
2986 |
__ psrldq(xmm_result0, 2); |
|
2987 |
__ addptr(pos, 2); |
|
2988 |
__ BIND(L_processTail_1_extr[k]); |
|
2989 |
__ testptr(len_reg, 1); |
|
2990 |
__ jcc(Assembler::zero, L_processTail_exit_extr[k]); |
|
2991 |
__ pextrb(Address(to, pos), xmm_result0, 0); |
|
2992 |
||
2993 |
__ BIND(L_processTail_exit_extr[k]); |
|
2994 |
__ movptr(used_addr, used_addr_param); |
|
2995 |
__ movl(Address(used_addr, 0), len_reg); |
|
2996 |
__ jmp(L_exit); |
|
2997 |
} |
|
2998 |
||
2999 |
__ BIND(L_exit); |
|
3000 |
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr())); |
|
3001 |
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back. |
|
3002 |
__ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back |
|
3003 |
handleSOERegisters(false /*restoring*/); |
|
3004 |
__ movptr(rax, len_param); // return length |
|
3005 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3006 |
__ ret(0); |
|
3007 |
||
3008 |
__ BIND (L_key192_top); |
|
3009 |
__ movptr(pos, 0); // init pos before L_multiBlock_loopTop |
|
3010 |
__ jmp(L_multiBlock_loopTop[1]); //key192 |
|
3011 |
||
3012 |
__ BIND (L_key256_top); |
|
3013 |
__ movptr(pos, 0); // init pos before L_multiBlock_loopTop |
|
3014 |
__ jmp(L_multiBlock_loopTop[2]); //key192 |
|
3015 |
||
3016 |
return start; |
|
3017 |
} |
|
3018 |
||
36555 | 3019 |
address generate_upper_word_mask() { |
3020 |
__ align(64); |
|
3021 |
StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); |
|
3022 |
address start = __ pc(); |
|
3023 |
__ emit_data(0x00000000, relocInfo::none, 0); |
|
3024 |
__ emit_data(0x00000000, relocInfo::none, 0); |
|
3025 |
__ emit_data(0x00000000, relocInfo::none, 0); |
|
3026 |
__ emit_data(0xFFFFFFFF, relocInfo::none, 0); |
|
3027 |
return start; |
|
3028 |
} |
|
3029 |
||
3030 |
address generate_shuffle_byte_flip_mask() { |
|
3031 |
__ align(64); |
|
3032 |
StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); |
|
3033 |
address start = __ pc(); |
|
3034 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0); |
|
3035 |
__ emit_data(0x08090a0b, relocInfo::none, 0); |
|
3036 |
__ emit_data(0x04050607, relocInfo::none, 0); |
|
3037 |
__ emit_data(0x00010203, relocInfo::none, 0); |
|
3038 |
return start; |
|
3039 |
} |
|
3040 |
||
3041 |
// ofs and limit are use for multi-block byte array. |
|
3042 |
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) |
|
3043 |
address generate_sha1_implCompress(bool multi_block, const char *name) { |
|
3044 |
__ align(CodeEntryAlignment); |
|
3045 |
StubCodeMark mark(this, "StubRoutines", name); |
|
3046 |
address start = __ pc(); |
|
3047 |
||
3048 |
Register buf = rax; |
|
3049 |
Register state = rdx; |
|
3050 |
Register ofs = rcx; |
|
3051 |
Register limit = rdi; |
|
3052 |
||
3053 |
const Address buf_param(rbp, 8 + 0); |
|
3054 |
const Address state_param(rbp, 8 + 4); |
|
3055 |
const Address ofs_param(rbp, 8 + 8); |
|
3056 |
const Address limit_param(rbp, 8 + 12); |
|
3057 |
||
3058 |
const XMMRegister abcd = xmm0; |
|
3059 |
const XMMRegister e0 = xmm1; |
|
3060 |
const XMMRegister e1 = xmm2; |
|
3061 |
const XMMRegister msg0 = xmm3; |
|
3062 |
||
3063 |
const XMMRegister msg1 = xmm4; |
|
3064 |
const XMMRegister msg2 = xmm5; |
|
3065 |
const XMMRegister msg3 = xmm6; |
|
3066 |
const XMMRegister shuf_mask = xmm7; |
|
3067 |
||
3068 |
__ enter(); |
|
3069 |
__ subptr(rsp, 8 * wordSize); |
|
3070 |
if (multi_block) { |
|
3071 |
__ push(limit); |
|
3072 |
} |
|
3073 |
__ movptr(buf, buf_param); |
|
3074 |
__ movptr(state, state_param); |
|
3075 |
if (multi_block) { |
|
3076 |
__ movptr(ofs, ofs_param); |
|
3077 |
__ movptr(limit, limit_param); |
|
3078 |
} |
|
3079 |
||
3080 |
__ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask, |
|
3081 |
buf, state, ofs, limit, rsp, multi_block); |
|
3082 |
||
3083 |
if (multi_block) { |
|
3084 |
__ pop(limit); |
|
3085 |
} |
|
3086 |
__ addptr(rsp, 8 * wordSize); |
|
3087 |
__ leave(); |
|
3088 |
__ ret(0); |
|
3089 |
return start; |
|
3090 |
} |
|
3091 |
||
3092 |
address generate_pshuffle_byte_flip_mask() { |
|
3093 |
__ align(64); |
|
3094 |
StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask"); |
|
3095 |
address start = __ pc(); |
|
3096 |
__ emit_data(0x00010203, relocInfo::none, 0); |
|
3097 |
__ emit_data(0x04050607, relocInfo::none, 0); |
|
3098 |
__ emit_data(0x08090a0b, relocInfo::none, 0); |
|
3099 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0); |
|
3100 |
return start; |
|
3101 |
} |
|
3102 |
||
3103 |
// ofs and limit are use for multi-block byte array. |
|
3104 |
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) |
|
3105 |
address generate_sha256_implCompress(bool multi_block, const char *name) { |
|
3106 |
__ align(CodeEntryAlignment); |
|
3107 |
StubCodeMark mark(this, "StubRoutines", name); |
|
3108 |
address start = __ pc(); |
|
3109 |
||
3110 |
Register buf = rbx; |
|
3111 |
Register state = rsi; |
|
3112 |
Register ofs = rdx; |
|
3113 |
Register limit = rcx; |
|
3114 |
||
3115 |
const Address buf_param(rbp, 8 + 0); |
|
3116 |
const Address state_param(rbp, 8 + 4); |
|
3117 |
const Address ofs_param(rbp, 8 + 8); |
|
3118 |
const Address limit_param(rbp, 8 + 12); |
|
3119 |
||
3120 |
const XMMRegister msg = xmm0; |
|
3121 |
const XMMRegister state0 = xmm1; |
|
3122 |
const XMMRegister state1 = xmm2; |
|
3123 |
const XMMRegister msgtmp0 = xmm3; |
|
3124 |
||
3125 |
const XMMRegister msgtmp1 = xmm4; |
|
3126 |
const XMMRegister msgtmp2 = xmm5; |
|
3127 |
const XMMRegister msgtmp3 = xmm6; |
|
3128 |
const XMMRegister msgtmp4 = xmm7; |
|
3129 |
||
3130 |
__ enter(); |
|
3131 |
__ subptr(rsp, 8 * wordSize); |
|
3132 |
handleSOERegisters(true /*saving*/); |
|
3133 |
__ movptr(buf, buf_param); |
|
3134 |
__ movptr(state, state_param); |
|
3135 |
if (multi_block) { |
|
3136 |
__ movptr(ofs, ofs_param); |
|
3137 |
__ movptr(limit, limit_param); |
|
3138 |
} |
|
3139 |
||
3140 |
__ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, |
|
3141 |
buf, state, ofs, limit, rsp, multi_block); |
|
3142 |
||
3143 |
handleSOERegisters(false); |
|
3144 |
__ addptr(rsp, 8 * wordSize); |
|
3145 |
__ leave(); |
|
3146 |
__ ret(0); |
|
3147 |
return start; |
|
3148 |
} |
|
35154 | 3149 |
|
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3150 |
// byte swap x86 long |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3151 |
address generate_ghash_long_swap_mask() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3152 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3153 |
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3154 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3155 |
__ emit_data(0x0b0a0908, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3156 |
__ emit_data(0x0f0e0d0c, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3157 |
__ emit_data(0x03020100, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3158 |
__ emit_data(0x07060504, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3159 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3160 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3161 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3162 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3163 |
// byte swap x86 byte array |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3164 |
address generate_ghash_byte_swap_mask() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3165 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3166 |
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3167 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3168 |
__ emit_data(0x0c0d0e0f, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3169 |
__ emit_data(0x08090a0b, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3170 |
__ emit_data(0x04050607, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3171 |
__ emit_data(0x00010203, relocInfo::none, 0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3172 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3173 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3174 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3175 |
/* Single and multi-block ghash operations */ |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3176 |
address generate_ghash_processBlocks() { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3177 |
assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3178 |
__ align(CodeEntryAlignment); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3179 |
Label L_ghash_loop, L_exit; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3180 |
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3181 |
address start = __ pc(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3182 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3183 |
const Register state = rdi; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3184 |
const Register subkeyH = rsi; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3185 |
const Register data = rdx; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3186 |
const Register blocks = rcx; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3187 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3188 |
const Address state_param(rbp, 8+0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3189 |
const Address subkeyH_param(rbp, 8+4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3190 |
const Address data_param(rbp, 8+8); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3191 |
const Address blocks_param(rbp, 8+12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3192 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3193 |
const XMMRegister xmm_temp0 = xmm0; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3194 |
const XMMRegister xmm_temp1 = xmm1; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3195 |
const XMMRegister xmm_temp2 = xmm2; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3196 |
const XMMRegister xmm_temp3 = xmm3; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3197 |
const XMMRegister xmm_temp4 = xmm4; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3198 |
const XMMRegister xmm_temp5 = xmm5; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3199 |
const XMMRegister xmm_temp6 = xmm6; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3200 |
const XMMRegister xmm_temp7 = xmm7; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3201 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3202 |
__ enter(); |
31771
c9f593020799
8130341: GHASH 32bit intrinsics has AEADBadTagException
ascarpino
parents:
31404
diff
changeset
|
3203 |
handleSOERegisters(true); // Save registers |
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3204 |
|
32727
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
3205 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
3206 |
// context for the registers used, where all instructions below are using 128-bit mode |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
3207 |
// On EVEX without VL and BW, these instructions will all be AVX. |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
3208 |
if (VM_Version::supports_avx512vlbw()) { |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
3209 |
__ movl(rdx, 0xffff); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
3210 |
__ kmovdl(k1, rdx); |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
3211 |
} |
320855c2baef
8132160: support for AVX 512 call frames and stack management
mcberg
parents:
32596
diff
changeset
|
3212 |
|
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3213 |
__ movptr(state, state_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3214 |
__ movptr(subkeyH, subkeyH_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3215 |
__ movptr(data, data_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3216 |
__ movptr(blocks, blocks_param); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3217 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3218 |
__ movdqu(xmm_temp0, Address(state, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3219 |
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3220 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3221 |
__ movdqu(xmm_temp1, Address(subkeyH, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3222 |
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3223 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3224 |
__ BIND(L_ghash_loop); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3225 |
__ movdqu(xmm_temp2, Address(data, 0)); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3226 |
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3227 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3228 |
__ pxor(xmm_temp0, xmm_temp2); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3229 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3230 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3231 |
// Multiply with the hash key |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3232 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3233 |
__ movdqu(xmm_temp3, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3234 |
__ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3235 |
__ movdqu(xmm_temp4, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3236 |
__ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3237 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3238 |
__ movdqu(xmm_temp5, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3239 |
__ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3240 |
__ movdqu(xmm_temp6, xmm_temp0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3241 |
__ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3242 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3243 |
__ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3244 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3245 |
__ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3246 |
__ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3247 |
__ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3248 |
__ pxor(xmm_temp3, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3249 |
__ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3250 |
// of the carry-less multiplication of |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3251 |
// xmm0 by xmm1. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3252 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3253 |
// We shift the result of the multiplication by one bit position |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3254 |
// to the left to cope for the fact that the bits are reversed. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3255 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3256 |
__ movdqu(xmm_temp4, xmm_temp6); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3257 |
__ pslld (xmm_temp3, 1); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3258 |
__ pslld(xmm_temp6, 1); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3259 |
__ psrld(xmm_temp7, 31); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3260 |
__ psrld(xmm_temp4, 31); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3261 |
__ movdqu(xmm_temp5, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3262 |
__ pslldq(xmm_temp4, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3263 |
__ pslldq(xmm_temp7, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3264 |
__ psrldq(xmm_temp5, 12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3265 |
__ por(xmm_temp3, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3266 |
__ por(xmm_temp6, xmm_temp4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3267 |
__ por(xmm_temp6, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3268 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3269 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3270 |
// First phase of the reduction |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3271 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3272 |
// Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3273 |
// independently. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3274 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3275 |
__ movdqu(xmm_temp4, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3276 |
__ movdqu(xmm_temp5, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3277 |
__ pslld(xmm_temp7, 31); // packed right shift shifting << 31 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3278 |
__ pslld(xmm_temp4, 30); // packed right shift shifting << 30 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3279 |
__ pslld(xmm_temp5, 25); // packed right shift shifting << 25 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3280 |
__ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3281 |
__ pxor(xmm_temp7, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3282 |
__ movdqu(xmm_temp4, xmm_temp7); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3283 |
__ pslldq(xmm_temp7, 12); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3284 |
__ psrldq(xmm_temp4, 4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3285 |
__ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3286 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3287 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3288 |
// Second phase of the reduction |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3289 |
// |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3290 |
// Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3291 |
// shift operations. |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3292 |
__ movdqu(xmm_temp2, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3293 |
__ movdqu(xmm_temp7, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3294 |
__ movdqu(xmm_temp5, xmm_temp3); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3295 |
__ psrld(xmm_temp2, 1); // packed left shifting >> 1 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3296 |
__ psrld(xmm_temp7, 2); // packed left shifting >> 2 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3297 |
__ psrld(xmm_temp5, 7); // packed left shifting >> 7 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3298 |
__ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3299 |
__ pxor(xmm_temp2, xmm_temp5); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3300 |
__ pxor(xmm_temp2, xmm_temp4); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3301 |
__ pxor(xmm_temp3, xmm_temp2); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3302 |
__ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3303 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3304 |
__ decrement(blocks); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3305 |
__ jcc(Assembler::zero, L_exit); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3306 |
__ movdqu(xmm_temp0, xmm_temp6); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3307 |
__ addptr(data, 16); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3308 |
__ jmp(L_ghash_loop); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3309 |
|
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3310 |
__ BIND(L_exit); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3311 |
// Byte swap 16-byte result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3312 |
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3313 |
__ movdqu(Address(state, 0), xmm_temp6); // store the result |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3314 |
|
31771
c9f593020799
8130341: GHASH 32bit intrinsics has AEADBadTagException
ascarpino
parents:
31404
diff
changeset
|
3315 |
handleSOERegisters(false); // restore registers |
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3316 |
__ leave(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3317 |
__ ret(0); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3318 |
return start; |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3319 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3320 |
|
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3321 |
/** |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3322 |
* Arguments: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3323 |
* |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3324 |
* Inputs: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3325 |
* rsp(4) - int crc |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3326 |
* rsp(8) - byte* buf |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3327 |
* rsp(12) - int length |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3328 |
* |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3329 |
* Ouput: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3330 |
* rax - int crc result |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3331 |
*/ |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3332 |
address generate_updateBytesCRC32() { |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3333 |
assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3334 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3335 |
__ align(CodeEntryAlignment); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3336 |
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3337 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3338 |
address start = __ pc(); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3339 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3340 |
const Register crc = rdx; // crc |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3341 |
const Register buf = rsi; // source java byte array address |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3342 |
const Register len = rcx; // length |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3343 |
const Register table = rdi; // crc_table address (reuse register) |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3344 |
const Register tmp = rbx; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3345 |
assert_different_registers(crc, buf, len, table, tmp, rax); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3346 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3347 |
BLOCK_COMMENT("Entry:"); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3348 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3349 |
__ push(rsi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3350 |
__ push(rdi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3351 |
__ push(rbx); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3352 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3353 |
Address crc_arg(rbp, 8 + 0); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3354 |
Address buf_arg(rbp, 8 + 4); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3355 |
Address len_arg(rbp, 8 + 8); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3356 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3357 |
// Load up: |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3358 |
__ movl(crc, crc_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3359 |
__ movptr(buf, buf_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3360 |
__ movl(len, len_arg); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3361 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3362 |
__ kernel_crc32(crc, buf, len, table, tmp); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3363 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3364 |
__ movl(rax, crc); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3365 |
__ pop(rbx); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3366 |
__ pop(rdi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3367 |
__ pop(rsi); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3368 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3369 |
__ ret(0); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3370 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3371 |
return start; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3372 |
} |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3373 |
|
33066 | 3374 |
/** |
3375 |
* Arguments: |
|
3376 |
* |
|
3377 |
* Inputs: |
|
3378 |
* rsp(4) - int crc |
|
3379 |
* rsp(8) - byte* buf |
|
3380 |
* rsp(12) - int length |
|
3381 |
* rsp(16) - table_start - optional (present only when doing a library_calll, |
|
3382 |
* not used by x86 algorithm) |
|
3383 |
* |
|
3384 |
* Ouput: |
|
3385 |
* rax - int crc result |
|
3386 |
*/ |
|
3387 |
address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) { |
|
3388 |
assert(UseCRC32CIntrinsics, "need SSE4_2"); |
|
3389 |
__ align(CodeEntryAlignment); |
|
3390 |
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); |
|
3391 |
address start = __ pc(); |
|
3392 |
const Register crc = rax; // crc |
|
3393 |
const Register buf = rcx; // source java byte array address |
|
3394 |
const Register len = rdx; // length |
|
3395 |
const Register d = rbx; |
|
3396 |
const Register g = rsi; |
|
3397 |
const Register h = rdi; |
|
3398 |
const Register empty = 0; // will never be used, in order not |
|
3399 |
// to change a signature for crc32c_IPL_Alg2_Alt2 |
|
3400 |
// between 64/32 I'm just keeping it here |
|
3401 |
assert_different_registers(crc, buf, len, d, g, h); |
|
3402 |
||
3403 |
BLOCK_COMMENT("Entry:"); |
|
3404 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3405 |
Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 + |
|
3406 |
// we need to add additional 4 because __ enter |
|
3407 |
// have just pushed ebp on a stack |
|
3408 |
Address buf_arg(rsp, 4 + 4 + 4); |
|
3409 |
Address len_arg(rsp, 4 + 4 + 8); |
|
3410 |
// Load up: |
|
3411 |
__ movl(crc, crc_arg); |
|
3412 |
__ movl(buf, buf_arg); |
|
3413 |
__ movl(len, len_arg); |
|
3414 |
__ push(d); |
|
3415 |
__ push(g); |
|
3416 |
__ push(h); |
|
3417 |
__ crc32c_ipl_alg2_alt2(crc, buf, len, |
|
3418 |
d, g, h, |
|
3419 |
empty, empty, empty, |
|
3420 |
xmm0, xmm1, xmm2, |
|
3421 |
is_pclmulqdq_supported); |
|
3422 |
__ pop(h); |
|
3423 |
__ pop(g); |
|
3424 |
__ pop(d); |
|
3425 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3426 |
__ ret(0); |
|
3427 |
||
3428 |
return start; |
|
3429 |
} |
|
3430 |
||
33089 | 3431 |
address generate_libmExp() { |
3432 |
address start = __ pc(); |
|
3433 |
||
3434 |
const XMMRegister x0 = xmm0; |
|
3435 |
const XMMRegister x1 = xmm1; |
|
3436 |
const XMMRegister x2 = xmm2; |
|
3437 |
const XMMRegister x3 = xmm3; |
|
3438 |
||
3439 |
const XMMRegister x4 = xmm4; |
|
3440 |
const XMMRegister x5 = xmm5; |
|
3441 |
const XMMRegister x6 = xmm6; |
|
3442 |
const XMMRegister x7 = xmm7; |
|
3443 |
||
3444 |
const Register tmp = rbx; |
|
3445 |
||
3446 |
BLOCK_COMMENT("Entry:"); |
|
3447 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3448 |
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); |
|
3449 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3450 |
__ ret(0); |
|
3451 |
||
3452 |
return start; |
|
3453 |
||
3454 |
} |
|
3455 |
||
33465 | 3456 |
address generate_libmLog() { |
3457 |
address start = __ pc(); |
|
3458 |
||
3459 |
const XMMRegister x0 = xmm0; |
|
3460 |
const XMMRegister x1 = xmm1; |
|
3461 |
const XMMRegister x2 = xmm2; |
|
3462 |
const XMMRegister x3 = xmm3; |
|
3463 |
||
3464 |
const XMMRegister x4 = xmm4; |
|
3465 |
const XMMRegister x5 = xmm5; |
|
3466 |
const XMMRegister x6 = xmm6; |
|
3467 |
const XMMRegister x7 = xmm7; |
|
3468 |
||
3469 |
const Register tmp = rbx; |
|
3470 |
||
3471 |
BLOCK_COMMENT("Entry:"); |
|
3472 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3473 |
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); |
|
3474 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3475 |
__ ret(0); |
|
3476 |
||
3477 |
return start; |
|
3478 |
||
3479 |
} |
|
3480 |
||
38018
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3481 |
address generate_libmLog10() { |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3482 |
address start = __ pc(); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3483 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3484 |
const XMMRegister x0 = xmm0; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3485 |
const XMMRegister x1 = xmm1; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3486 |
const XMMRegister x2 = xmm2; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3487 |
const XMMRegister x3 = xmm3; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3488 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3489 |
const XMMRegister x4 = xmm4; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3490 |
const XMMRegister x5 = xmm5; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3491 |
const XMMRegister x6 = xmm6; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3492 |
const XMMRegister x7 = xmm7; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3493 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3494 |
const Register tmp = rbx; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3495 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3496 |
BLOCK_COMMENT("Entry:"); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3497 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3498 |
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3499 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3500 |
__ ret(0); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3501 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3502 |
return start; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3503 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3504 |
} |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3505 |
|
35146 | 3506 |
address generate_libmPow() { |
3507 |
address start = __ pc(); |
|
3508 |
||
3509 |
const XMMRegister x0 = xmm0; |
|
3510 |
const XMMRegister x1 = xmm1; |
|
3511 |
const XMMRegister x2 = xmm2; |
|
3512 |
const XMMRegister x3 = xmm3; |
|
3513 |
||
3514 |
const XMMRegister x4 = xmm4; |
|
3515 |
const XMMRegister x5 = xmm5; |
|
3516 |
const XMMRegister x6 = xmm6; |
|
3517 |
const XMMRegister x7 = xmm7; |
|
3518 |
||
3519 |
const Register tmp = rbx; |
|
3520 |
||
3521 |
BLOCK_COMMENT("Entry:"); |
|
3522 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3523 |
__ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); |
|
3524 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3525 |
__ ret(0); |
|
3526 |
||
3527 |
return start; |
|
3528 |
||
3529 |
} |
|
33465 | 3530 |
|
35540
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3531 |
address generate_libm_reduce_pi04l() { |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3532 |
address start = __ pc(); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3533 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3534 |
BLOCK_COMMENT("Entry:"); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3535 |
__ libm_reduce_pi04l(rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3536 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3537 |
return start; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3538 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3539 |
} |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3540 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3541 |
address generate_libm_sin_cos_huge() { |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3542 |
address start = __ pc(); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3543 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3544 |
const XMMRegister x0 = xmm0; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3545 |
const XMMRegister x1 = xmm1; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3546 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3547 |
BLOCK_COMMENT("Entry:"); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3548 |
__ libm_sincos_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3549 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3550 |
return start; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3551 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3552 |
} |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3553 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3554 |
address generate_libmSin() { |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3555 |
address start = __ pc(); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3556 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3557 |
const XMMRegister x0 = xmm0; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3558 |
const XMMRegister x1 = xmm1; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3559 |
const XMMRegister x2 = xmm2; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3560 |
const XMMRegister x3 = xmm3; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3561 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3562 |
const XMMRegister x4 = xmm4; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3563 |
const XMMRegister x5 = xmm5; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3564 |
const XMMRegister x6 = xmm6; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3565 |
const XMMRegister x7 = xmm7; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3566 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3567 |
BLOCK_COMMENT("Entry:"); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3568 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3569 |
__ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rdx); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3570 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3571 |
__ ret(0); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3572 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3573 |
return start; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3574 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3575 |
} |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3576 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3577 |
address generate_libmCos() { |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3578 |
address start = __ pc(); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3579 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3580 |
const XMMRegister x0 = xmm0; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3581 |
const XMMRegister x1 = xmm1; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3582 |
const XMMRegister x2 = xmm2; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3583 |
const XMMRegister x3 = xmm3; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3584 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3585 |
const XMMRegister x4 = xmm4; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3586 |
const XMMRegister x5 = xmm5; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3587 |
const XMMRegister x6 = xmm6; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3588 |
const XMMRegister x7 = xmm7; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3589 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3590 |
const Register tmp = rbx; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3591 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3592 |
BLOCK_COMMENT("Entry:"); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3593 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3594 |
__ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3595 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3596 |
__ ret(0); |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3597 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3598 |
return start; |
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3599 |
|
e001ad24dcdb
8143353: update for x86 sin and cos in the math lib
vdeshpande
parents:
35537
diff
changeset
|
3600 |
} |
33089 | 3601 |
|
38018
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3602 |
address generate_libm_tan_cot_huge() { |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3603 |
address start = __ pc(); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3604 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3605 |
const XMMRegister x0 = xmm0; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3606 |
const XMMRegister x1 = xmm1; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3607 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3608 |
BLOCK_COMMENT("Entry:"); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3609 |
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3610 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3611 |
return start; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3612 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3613 |
} |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3614 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3615 |
address generate_libmTan() { |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3616 |
address start = __ pc(); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3617 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3618 |
const XMMRegister x0 = xmm0; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3619 |
const XMMRegister x1 = xmm1; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3620 |
const XMMRegister x2 = xmm2; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3621 |
const XMMRegister x3 = xmm3; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3622 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3623 |
const XMMRegister x4 = xmm4; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3624 |
const XMMRegister x5 = xmm5; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3625 |
const XMMRegister x6 = xmm6; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3626 |
const XMMRegister x7 = xmm7; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3627 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3628 |
const Register tmp = rbx; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3629 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3630 |
BLOCK_COMMENT("Entry:"); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3631 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3632 |
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3633 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3634 |
__ ret(0); |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3635 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3636 |
return start; |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3637 |
|
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3638 |
} |
1dc6c6f21231
8152907: Update for x86 tan and log10 in the math lib
vdeshpande
parents:
36825
diff
changeset
|
3639 |
|
18740 | 3640 |
// Safefetch stubs. |
3641 |
void generate_safefetch(const char* name, int size, address* entry, |
|
3642 |
address* fault_pc, address* continuation_pc) { |
|
3643 |
// safefetch signatures: |
|
3644 |
// int SafeFetch32(int* adr, int errValue); |
|
3645 |
// intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); |
|
3646 |
||
3647 |
StubCodeMark mark(this, "StubRoutines", name); |
|
3648 |
||
3649 |
// Entry point, pc or function descriptor. |
|
3650 |
*entry = __ pc(); |
|
3651 |
||
3652 |
__ movl(rax, Address(rsp, 0x8)); |
|
3653 |
__ movl(rcx, Address(rsp, 0x4)); |
|
3654 |
// Load *adr into eax, may fault. |
|
3655 |
*fault_pc = __ pc(); |
|
3656 |
switch (size) { |
|
3657 |
case 4: |
|
3658 |
// int32_t |
|
3659 |
__ movl(rax, Address(rcx, 0)); |
|
3660 |
break; |
|
3661 |
case 8: |
|
3662 |
// int64_t |
|
3663 |
Unimplemented(); |
|
3664 |
break; |
|
3665 |
default: |
|
3666 |
ShouldNotReachHere(); |
|
3667 |
} |
|
3668 |
||
3669 |
// Return errValue or *adr. |
|
3670 |
*continuation_pc = __ pc(); |
|
3671 |
__ ret(0); |
|
3672 |
} |
|
14132 | 3673 |
|
1 | 3674 |
public: |
3675 |
// Information about frame layout at time of blocking runtime call. |
|
3676 |
// Note that we only have to preserve callee-saved registers since |
|
3677 |
// the compilers are responsible for supplying a continuation point |
|
3678 |
// if they expect all registers to be preserved. |
|
3679 |
enum layout { |
|
3680 |
thread_off, // last_java_sp |
|
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3681 |
arg1_off, |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3682 |
arg2_off, |
1 | 3683 |
rbp_off, // callee saved register |
3684 |
ret_pc, |
|
3685 |
framesize |
|
3686 |
}; |
|
3687 |
||
3688 |
private: |
|
3689 |
||
3690 |
#undef __ |
|
3691 |
#define __ masm-> |
|
3692 |
||
3693 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3694 |
// Continuation point for throwing of implicit exceptions that are not handled in |
|
3695 |
// the current activation. Fabricates an exception oop and initiates normal |
|
3696 |
// exception dispatching in this frame. |
|
3697 |
// |
|
3698 |
// Previously the compiler (c2) allowed for callee save registers on Java calls. |
|
3699 |
// This is no longer true after adapter frames were removed but could possibly |
|
3700 |
// be brought back in the future if the interpreter code was reworked and it |
|
3701 |
// was deemed worthwhile. The comment below was left to describe what must |
|
3702 |
// happen here if callee saves were resurrected. As it stands now this stub |
|
3703 |
// could actually be a vanilla BufferBlob and have now oopMap at all. |
|
3704 |
// Since it doesn't make much difference we've chosen to leave it the |
|
3705 |
// way it was in the callee save days and keep the comment. |
|
3706 |
||
3707 |
// If we need to preserve callee-saved values we need a callee-saved oop map and |
|
3708 |
// therefore have to make these stubs into RuntimeStubs rather than BufferBlobs. |
|
3709 |
// If the compiler needs all registers to be preserved between the fault |
|
3710 |
// point and the exception handler then it must assume responsibility for that in |
|
3711 |
// AbstractCompiler::continuation_for_implicit_null_exception or |
|
3712 |
// continuation_for_implicit_division_by_zero_exception. All other implicit |
|
3713 |
// exceptions (e.g., NullPointerException or AbstractMethodError on entry) are |
|
3714 |
// either at call sites or otherwise assume that stack unwinding will be initiated, |
|
3715 |
// so caller saved registers were assumed volatile in the compiler. |
|
3716 |
address generate_throw_exception(const char* name, address runtime_entry, |
|
10545 | 3717 |
Register arg1 = noreg, Register arg2 = noreg) { |
1 | 3718 |
|
3719 |
int insts_size = 256; |
|
3720 |
int locs_size = 32; |
|
3721 |
||
3722 |
CodeBuffer code(name, insts_size, locs_size); |
|
3723 |
OopMapSet* oop_maps = new OopMapSet(); |
|
3724 |
MacroAssembler* masm = new MacroAssembler(&code); |
|
3725 |
||
3726 |
address start = __ pc(); |
|
3727 |
||
3728 |
// This is an inlined and slightly modified version of call_VM |
|
3729 |
// which has the ability to fetch the return PC out of |
|
3730 |
// thread-local storage and also sets up last_Java_sp slightly |
|
3731 |
// differently than the real call_VM |
|
3732 |
Register java_thread = rbx; |
|
3733 |
__ get_thread(java_thread); |
|
3734 |
||
3735 |
__ enter(); // required for proper stackwalking of RuntimeStub frame |
|
3736 |
||
3737 |
// pc and rbp, already pushed |
|
1066 | 3738 |
__ subptr(rsp, (framesize-2) * wordSize); // prolog |
1 | 3739 |
|
3740 |
// Frame is now completed as far as size and linkage. |
|
3741 |
||
3742 |
int frame_complete = __ pc() - start; |
|
3743 |
||
3744 |
// push java thread (becomes first argument of C function) |
|
1066 | 3745 |
__ movptr(Address(rsp, thread_off * wordSize), java_thread); |
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3746 |
if (arg1 != noreg) { |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3747 |
__ movptr(Address(rsp, arg1_off * wordSize), arg1); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3748 |
} |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3749 |
if (arg2 != noreg) { |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3750 |
assert(arg1 != noreg, "missing reg arg"); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3751 |
__ movptr(Address(rsp, arg2_off * wordSize), arg2); |
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3752 |
} |
1 | 3753 |
|
3754 |
// Set up last_Java_sp and last_Java_fp |
|
3755 |
__ set_last_Java_frame(java_thread, rsp, rbp, NULL); |
|
3756 |
||
3757 |
// Call runtime |
|
3758 |
BLOCK_COMMENT("call runtime_entry"); |
|
3759 |
__ call(RuntimeAddress(runtime_entry)); |
|
3760 |
// Generate oop map |
|
3761 |
OopMap* map = new OopMap(framesize, 0); |
|
3762 |
oop_maps->add_gc_map(__ pc() - start, map); |
|
3763 |
||
3764 |
// restore the thread (cannot use the pushed argument since arguments |
|
3765 |
// may be overwritten by C code generated by an optimizing compiler); |
|
3766 |
// however can use the register value directly if it is callee saved. |
|
3767 |
__ get_thread(java_thread); |
|
3768 |
||
40644
39e631ed7145
8161598: Kitchensink fails: assert(nm->insts_contains(original_pc)) failed: original PC must be in nmethod/CompiledMethod
dlong
parents:
38699
diff
changeset
|
3769 |
__ reset_last_Java_frame(java_thread, true); |
1 | 3770 |
|
3771 |
__ leave(); // required for proper stackwalking of RuntimeStub frame |
|
3772 |
||
3773 |
// check for pending exceptions |
|
3774 |
#ifdef ASSERT |
|
3775 |
Label L; |
|
1066 | 3776 |
__ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
1 | 3777 |
__ jcc(Assembler::notEqual, L); |
3778 |
__ should_not_reach_here(); |
|
3779 |
__ bind(L); |
|
3780 |
#endif /* ASSERT */ |
|
3781 |
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); |
|
3782 |
||
3783 |
||
3784 |
RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false); |
|
3785 |
return stub->entry_point(); |
|
3786 |
} |
|
3787 |
||
3788 |
||
3789 |
void create_control_words() { |
|
3790 |
// Round to nearest, 53-bit mode, exceptions masked |
|
3791 |
StubRoutines::_fpu_cntrl_wrd_std = 0x027F; |
|
3792 |
// Round to zero, 53-bit mode, exception mased |
|
3793 |
StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F; |
|
3794 |
// Round to nearest, 24-bit mode, exceptions masked |
|
3795 |
StubRoutines::_fpu_cntrl_wrd_24 = 0x007F; |
|
3796 |
// Round to nearest, 64-bit mode, exceptions masked |
|
3797 |
StubRoutines::_fpu_cntrl_wrd_64 = 0x037F; |
|
3798 |
// Round to nearest, 64-bit mode, exceptions masked |
|
3799 |
StubRoutines::_mxcsr_std = 0x1F80; |
|
3800 |
// Note: the following two constants are 80-bit values |
|
3801 |
// layout is critical for correct loading by FPU. |
|
3802 |
// Bias for strict fp multiply/divide |
|
3803 |
StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000 |
|
3804 |
StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000; |
|
3805 |
StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff; |
|
3806 |
// Un-Bias for strict fp multiply/divide |
|
3807 |
StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000 |
|
3808 |
StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000; |
|
3809 |
StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff; |
|
3810 |
} |
|
3811 |
||
3812 |
//--------------------------------------------------------------------------- |
|
3813 |
// Initialization |
|
3814 |
||
3815 |
void generate_initial() { |
|
3816 |
// Generates all stubs and initializes the entry points |
|
3817 |
||
3818 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3819 |
// entry points that exist in all platforms |
|
3820 |
// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than |
|
3821 |
// the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. |
|
3822 |
StubRoutines::_forward_exception_entry = generate_forward_exception(); |
|
3823 |
||
3824 |
StubRoutines::_call_stub_entry = |
|
3825 |
generate_call_stub(StubRoutines::_call_stub_return_address); |
|
3826 |
// is referenced by megamorphic call |
|
3827 |
StubRoutines::_catch_exception_entry = generate_catch_exception(); |
|
3828 |
||
3829 |
// These are currently used by Solaris/Intel |
|
3830 |
StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); |
|
3831 |
||
3832 |
// platform dependent |
|
3833 |
create_control_words(); |
|
3834 |
||
1066 | 3835 |
StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); |
3836 |
StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd(); |
|
1 | 3837 |
StubRoutines::_d2i_wrapper = generate_d2i_wrapper(T_INT, |
3838 |
CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); |
|
3839 |
StubRoutines::_d2l_wrapper = generate_d2i_wrapper(T_LONG, |
|
3840 |
CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); |
|
10004
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3841 |
|
190e88f7edd1
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
8498
diff
changeset
|
3842 |
// Build this early so it's available for the interpreter |
35071
a0910b1d3e0d
8046936: JEP 270: Reserved Stack Areas for Critical Sections
fparain
parents:
33465
diff
changeset
|
3843 |
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", |
a0910b1d3e0d
8046936: JEP 270: Reserved Stack Areas for Critical Sections
fparain
parents:
33465
diff
changeset
|
3844 |
CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); |
a0910b1d3e0d
8046936: JEP 270: Reserved Stack Areas for Critical Sections
fparain
parents:
33465
diff
changeset
|
3845 |
StubRoutines::_throw_delayed_StackOverflowError_entry = generate_throw_exception("delayed StackOverflowError throw_exception", |
a0910b1d3e0d
8046936: JEP 270: Reserved Stack Areas for Critical Sections
fparain
parents:
33465
diff
changeset
|
3846 |
CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError)); |
18507
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3847 |
|
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3848 |
if (UseCRC32Intrinsics) { |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3849 |
// set table address before stub generation which use it |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3850 |
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3851 |
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); |
61bfc8995bb3
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
18073
diff
changeset
|
3852 |
} |
33066 | 3853 |
|
3854 |
if (UseCRC32CIntrinsics) { |
|
3855 |
bool supports_clmul = VM_Version::supports_clmul(); |
|
3856 |
StubRoutines::x86::generate_CRC32C_table(supports_clmul); |
|
3857 |
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table; |
|
3858 |
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul); |
|
3859 |
} |
|
42618
08162de8f053
8170430: x86 pow() stub from Intel libm is inconsistent with pow() from fdlib
vdeshpande
parents:
40644
diff
changeset
|
3860 |
if (VM_Version::supports_sse2() && UseLibmIntrinsic && InlineIntrinsics) { |
38699
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3861 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3862 |
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) || |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3863 |
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3864 |
StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0; |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3865 |
StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv; |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3866 |
StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3; |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3867 |
StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4; |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3868 |
StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones; |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3869 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3870 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3871 |
StubRoutines::_dexp = generate_libmExp(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3872 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3873 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3874 |
StubRoutines::_dlog = generate_libmLog(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3875 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3876 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3877 |
StubRoutines::_dlog10 = generate_libmLog10(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3878 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3879 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3880 |
StubRoutines::_dpow = generate_libmPow(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3881 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3882 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3883 |
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) || |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3884 |
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3885 |
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3886 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3887 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3888 |
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3889 |
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3890 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3891 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3892 |
StubRoutines::_dsin = generate_libmSin(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3893 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3894 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3895 |
StubRoutines::_dcos = generate_libmCos(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3896 |
} |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3897 |
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3898 |
StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3899 |
StubRoutines::_dtan = generate_libmTan(); |
f8bec5f6b09c
8154473: Update for CompilerDirectives to control stub generation and intrinsics
vdeshpande
parents:
38209
diff
changeset
|
3900 |
} |
33089 | 3901 |
} |
1 | 3902 |
} |
3903 |
||
3904 |
void generate_all() { |
|
3905 |
// Generates all stubs and initializes the entry points |
|
3906 |
||
3907 |
// These entry points require SharedInfo::stack0 to be set up in non-core builds |
|
3908 |
// and need to be relocatable, so they each fabricate a RuntimeStub internally. |
|
10545 | 3909 |
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); |
3910 |
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); |
|
3911 |
StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); |
|
1 | 3912 |
|
3913 |
//------------------------------------------------------------------------------------------------------------------------ |
|
3914 |
// entry points that are platform specific |
|
3915 |
||
3916 |
// support for verify_oop (must happen after universe_init) |
|
3917 |
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); |
|
3918 |
||
3919 |
// arraycopy stubs used by compilers |
|
3920 |
generate_arraycopy_stubs(); |
|
2534 | 3921 |
|
14132 | 3922 |
// don't bother generating these AES intrinsic stubs unless global flag is set |
3923 |
if (UseAESIntrinsics) { |
|
3924 |
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others |
|
3925 |
||
3926 |
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); |
|
3927 |
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); |
|
3928 |
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); |
|
36825 | 3929 |
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); |
14132 | 3930 |
} |
18740 | 3931 |
|
35154 | 3932 |
if (UseAESCTRIntrinsics) { |
3933 |
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask(); |
|
3934 |
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); |
|
3935 |
} |
|
3936 |
||
36555 | 3937 |
if (UseSHA1Intrinsics) { |
3938 |
StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); |
|
3939 |
StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); |
|
3940 |
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); |
|
3941 |
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); |
|
3942 |
} |
|
3943 |
if (UseSHA256Intrinsics) { |
|
3944 |
StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256; |
|
3945 |
StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); |
|
3946 |
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); |
|
3947 |
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); |
|
3948 |
} |
|
3949 |
||
31404
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3950 |
// Generate GHASH intrinsics code |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3951 |
if (UseGHASHIntrinsics) { |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3952 |
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3953 |
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3954 |
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3955 |
} |
63e8fcd70bfc
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
ascarpino
parents:
30624
diff
changeset
|
3956 |
|
18740 | 3957 |
// Safefetch stubs. |
3958 |
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, |
|
3959 |
&StubRoutines::_safefetch32_fault_pc, |
|
3960 |
&StubRoutines::_safefetch32_continuation_pc); |
|
3961 |
StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; |
|
3962 |
StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; |
|
3963 |
StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; |
|
1 | 3964 |
} |
3965 |
||
3966 |
||
3967 |
public: |
|
3968 |
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |
|
3969 |
if (all) { |
|
3970 |
generate_all(); |
|
3971 |
} else { |
|
3972 |
generate_initial(); |
|
3973 |
} |
|
3974 |
} |
|
3975 |
}; // end class declaration |
|
3976 |
||
3977 |
||
3978 |
void StubGenerator_generate(CodeBuffer* code, bool all) { |
|
3979 |
StubGenerator g(code, all); |
|
3980 |
} |