author | naoto |
Tue, 09 Jul 2019 08:05:38 -0700 | |
changeset 55627 | 9c1885fb2a42 |
parent 54786 | ebf733a324d4 |
permissions | -rw-r--r-- |
42065 | 1 |
/* |
53789 | 2 |
* Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. |
3 |
* Copyright (c) 2016, 2019, SAP SE. All rights reserved. |
|
42065 | 4 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
5 |
* |
|
6 |
* This code is free software; you can redistribute it and/or modify it |
|
7 |
* under the terms of the GNU General Public License version 2 only, as |
|
8 |
* published by the Free Software Foundation. |
|
9 |
* |
|
10 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
14 |
* accompanied this code). |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU General Public License version |
|
17 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
18 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 |
* |
|
20 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
21 |
* or visit www.oracle.com if you need additional information or have any |
|
22 |
* questions. |
|
23 |
* |
|
24 |
*/ |
|
25 |
||
26 |
#include "precompiled.hpp" |
|
27 |
#include "asm/macroAssembler.inline.hpp" |
|
28 |
#include "registerSaver_s390.hpp" |
|
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
29 |
#include "gc/shared/barrierSet.hpp" |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
30 |
#include "gc/shared/barrierSetAssembler.hpp" |
42065 | 31 |
#include "interpreter/interpreter.hpp" |
32 |
#include "interpreter/interp_masm.hpp" |
|
54786 | 33 |
#include "memory/universe.hpp" |
42065 | 34 |
#include "nativeInst_s390.hpp" |
35 |
#include "oops/instanceOop.hpp" |
|
36 |
#include "oops/objArrayKlass.hpp" |
|
37 |
#include "oops/oop.inline.hpp" |
|
38 |
#include "prims/methodHandles.hpp" |
|
39 |
#include "runtime/frame.inline.hpp" |
|
40 |
#include "runtime/handles.inline.hpp" |
|
41 |
#include "runtime/sharedRuntime.hpp" |
|
42 |
#include "runtime/stubCodeGenerator.hpp" |
|
43 |
#include "runtime/stubRoutines.hpp" |
|
44 |
#include "runtime/thread.inline.hpp" |
|
45 |
||
46 |
// Declaration and definition of StubGenerator (no .hpp file). |
|
47 |
// For a more detailed description of the stub routine structure |
|
48 |
// see the comment in stubRoutines.hpp. |
|
49 |
||
50 |
#ifdef PRODUCT |
|
51 |
#define __ _masm-> |
|
52 |
#else |
|
53 |
#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> |
|
54 |
#endif |
|
55 |
||
56 |
#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str) |
|
57 |
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") |
|
58 |
||
59 |
// ----------------------------------------------------------------------- |
|
60 |
// Stub Code definitions |
|
61 |
||
62 |
class StubGenerator: public StubCodeGenerator { |
|
63 |
private: |
|
64 |
||
65 |
//---------------------------------------------------------------------- |
|
66 |
// Call stubs are used to call Java from C. |
|
67 |
||
68 |
// |
|
69 |
// Arguments: |
|
70 |
// |
|
71 |
// R2 - call wrapper address : address |
|
72 |
// R3 - result : intptr_t* |
|
73 |
// R4 - result type : BasicType |
|
74 |
// R5 - method : method |
|
75 |
// R6 - frame mgr entry point : address |
|
76 |
// [SP+160] - parameter block : intptr_t* |
|
77 |
// [SP+172] - parameter count in words : int |
|
78 |
// [SP+176] - thread : Thread* |
|
79 |
// |
|
80 |
address generate_call_stub(address& return_address) { |
|
81 |
// Set up a new C frame, copy Java arguments, call frame manager |
|
82 |
// or native_entry, and process result. |
|
83 |
||
84 |
StubCodeMark mark(this, "StubRoutines", "call_stub"); |
|
85 |
address start = __ pc(); |
|
86 |
||
87 |
Register r_arg_call_wrapper_addr = Z_ARG1; |
|
88 |
Register r_arg_result_addr = Z_ARG2; |
|
89 |
Register r_arg_result_type = Z_ARG3; |
|
90 |
Register r_arg_method = Z_ARG4; |
|
91 |
Register r_arg_entry = Z_ARG5; |
|
92 |
||
93 |
// offsets to fp |
|
94 |
#define d_arg_thread 176 |
|
95 |
#define d_arg_argument_addr 160 |
|
96 |
#define d_arg_argument_count 168+4 |
|
97 |
||
98 |
Register r_entryframe_fp = Z_tmp_1; |
|
99 |
Register r_top_of_arguments_addr = Z_ARG4; |
|
100 |
Register r_new_arg_entry = Z_R14; |
|
101 |
||
102 |
// macros for frame offsets |
|
103 |
#define call_wrapper_address_offset \ |
|
104 |
_z_entry_frame_locals_neg(call_wrapper_address) |
|
105 |
#define result_address_offset \ |
|
106 |
_z_entry_frame_locals_neg(result_address) |
|
107 |
#define result_type_offset \ |
|
108 |
_z_entry_frame_locals_neg(result_type) |
|
109 |
#define arguments_tos_address_offset \ |
|
110 |
_z_entry_frame_locals_neg(arguments_tos_address) |
|
111 |
||
112 |
{ |
|
113 |
// |
|
114 |
// STACK on entry to call_stub: |
|
115 |
// |
|
116 |
// F1 [C_FRAME] |
|
117 |
// ... |
|
118 |
// |
|
119 |
||
120 |
Register r_argument_addr = Z_tmp_3; |
|
121 |
Register r_argumentcopy_addr = Z_tmp_4; |
|
122 |
Register r_argument_size_in_bytes = Z_ARG5; |
|
123 |
Register r_frame_size = Z_R1; |
|
124 |
||
125 |
Label arguments_copied; |
|
126 |
||
127 |
// Save non-volatile registers to ABI of caller frame. |
|
128 |
BLOCK_COMMENT("save registers, push frame {"); |
|
129 |
__ z_stmg(Z_R6, Z_R14, 16, Z_SP); |
|
130 |
__ z_std(Z_F8, 96, Z_SP); |
|
131 |
__ z_std(Z_F9, 104, Z_SP); |
|
132 |
__ z_std(Z_F10, 112, Z_SP); |
|
133 |
__ z_std(Z_F11, 120, Z_SP); |
|
134 |
__ z_std(Z_F12, 128, Z_SP); |
|
135 |
__ z_std(Z_F13, 136, Z_SP); |
|
136 |
__ z_std(Z_F14, 144, Z_SP); |
|
137 |
__ z_std(Z_F15, 152, Z_SP); |
|
138 |
||
139 |
// |
|
140 |
// Push ENTRY_FRAME including arguments: |
|
141 |
// |
|
142 |
// F0 [TOP_IJAVA_FRAME_ABI] |
|
143 |
// [outgoing Java arguments] |
|
144 |
// [ENTRY_FRAME_LOCALS] |
|
145 |
// F1 [C_FRAME] |
|
146 |
// ... |
|
147 |
// |
|
148 |
||
149 |
// Calculate new frame size and push frame. |
|
150 |
#define abi_plus_locals_size \ |
|
151 |
(frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size) |
|
152 |
if (abi_plus_locals_size % BytesPerWord == 0) { |
|
153 |
// Preload constant part of frame size. |
|
154 |
__ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord); |
|
155 |
// Keep copy of our frame pointer (caller's SP). |
|
156 |
__ z_lgr(r_entryframe_fp, Z_SP); |
|
157 |
// Add space required by arguments to frame size. |
|
158 |
__ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP); |
|
159 |
// Move Z_ARG5 early, it will be used as a local. |
|
160 |
__ z_lgr(r_new_arg_entry, r_arg_entry); |
|
161 |
// Convert frame size from words to bytes. |
|
162 |
__ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord); |
|
163 |
__ push_frame(r_frame_size, r_entryframe_fp, |
|
164 |
false/*don't copy SP*/, true /*frame size sign inverted*/); |
|
165 |
} else { |
|
166 |
guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)"); |
|
167 |
} |
|
168 |
BLOCK_COMMENT("} save, push"); |
|
169 |
||
170 |
// Load argument registers for call. |
|
171 |
BLOCK_COMMENT("prepare/copy arguments {"); |
|
172 |
__ z_lgr(Z_method, r_arg_method); |
|
173 |
__ z_lg(Z_thread, d_arg_thread, r_entryframe_fp); |
|
174 |
||
175 |
// Calculate top_of_arguments_addr which will be tos (not prepushed) later. |
|
176 |
// Wimply use SP + frame::top_ijava_frame_size. |
|
177 |
__ add2reg(r_top_of_arguments_addr, |
|
178 |
frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP); |
|
179 |
||
180 |
// Initialize call_stub locals (step 1). |
|
181 |
if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) && |
|
182 |
(result_address_offset + BytesPerWord == result_type_offset) && |
|
183 |
(result_type_offset + BytesPerWord == arguments_tos_address_offset)) { |
|
184 |
||
185 |
__ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr, |
|
186 |
call_wrapper_address_offset, r_entryframe_fp); |
|
187 |
} else { |
|
188 |
__ z_stg(r_arg_call_wrapper_addr, |
|
189 |
call_wrapper_address_offset, r_entryframe_fp); |
|
190 |
__ z_stg(r_arg_result_addr, |
|
191 |
result_address_offset, r_entryframe_fp); |
|
192 |
__ z_stg(r_arg_result_type, |
|
193 |
result_type_offset, r_entryframe_fp); |
|
194 |
__ z_stg(r_top_of_arguments_addr, |
|
195 |
arguments_tos_address_offset, r_entryframe_fp); |
|
196 |
} |
|
197 |
||
198 |
// Copy Java arguments. |
|
199 |
||
200 |
// Any arguments to copy? |
|
201 |
__ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count)); |
|
202 |
__ z_bre(arguments_copied); |
|
203 |
||
204 |
// Prepare loop and copy arguments in reverse order. |
|
205 |
{ |
|
206 |
// Calculate argument size in bytes. |
|
207 |
__ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord); |
|
208 |
||
209 |
// Get addr of first incoming Java argument. |
|
210 |
__ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp); |
|
211 |
||
212 |
// Let r_argumentcopy_addr point to last outgoing Java argument. |
|
213 |
__ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively. |
|
214 |
||
215 |
// Let r_argument_addr point to last incoming Java argument. |
|
216 |
__ add2reg_with_index(r_argument_addr, -BytesPerWord, |
|
217 |
r_argument_size_in_bytes, r_argument_addr); |
|
218 |
||
219 |
// Now loop while Z_R1 > 0 and copy arguments. |
|
220 |
{ |
|
221 |
Label next_argument; |
|
222 |
__ bind(next_argument); |
|
223 |
// Mem-mem move. |
|
224 |
__ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr); |
|
225 |
__ add2reg(r_argument_addr, -BytesPerWord); |
|
226 |
__ add2reg(r_argumentcopy_addr, BytesPerWord); |
|
227 |
__ z_brct(Z_R1, next_argument); |
|
228 |
} |
|
229 |
} // End of argument copy loop. |
|
230 |
||
231 |
__ bind(arguments_copied); |
|
232 |
} |
|
233 |
BLOCK_COMMENT("} arguments"); |
|
234 |
||
235 |
BLOCK_COMMENT("call {"); |
|
236 |
{ |
|
237 |
// Call frame manager or native entry. |
|
238 |
||
239 |
// |
|
240 |
// Register state on entry to frame manager / native entry: |
|
241 |
// |
|
242 |
// Z_ARG1 = r_top_of_arguments_addr - intptr_t *sender tos (prepushed) |
|
243 |
// Lesp = (SP) + copied_arguments_offset - 8 |
|
244 |
// Z_method - method |
|
245 |
// Z_thread - JavaThread* |
|
246 |
// |
|
247 |
||
248 |
// Here, the usual SP is the initial_caller_sp. |
|
249 |
__ z_lgr(Z_R10, Z_SP); |
|
250 |
||
251 |
// Z_esp points to the slot below the last argument. |
|
252 |
__ z_lgr(Z_esp, r_top_of_arguments_addr); |
|
253 |
||
254 |
// |
|
255 |
// Stack on entry to frame manager / native entry: |
|
256 |
// |
|
257 |
// F0 [TOP_IJAVA_FRAME_ABI] |
|
258 |
// [outgoing Java arguments] |
|
259 |
// [ENTRY_FRAME_LOCALS] |
|
260 |
// F1 [C_FRAME] |
|
261 |
// ... |
|
262 |
// |
|
263 |
||
264 |
// Do a light-weight C-call here, r_new_arg_entry holds the address |
|
265 |
// of the interpreter entry point (frame manager or native entry) |
|
266 |
// and save runtime-value of return_pc in return_address |
|
267 |
// (call by reference argument). |
|
268 |
return_address = __ call_stub(r_new_arg_entry); |
|
269 |
} |
|
270 |
BLOCK_COMMENT("} call"); |
|
271 |
||
272 |
{ |
|
273 |
BLOCK_COMMENT("restore registers {"); |
|
274 |
// Returned from frame manager or native entry. |
|
275 |
// Now pop frame, process result, and return to caller. |
|
276 |
||
277 |
// |
|
278 |
// Stack on exit from frame manager / native entry: |
|
279 |
// |
|
280 |
// F0 [ABI] |
|
281 |
// ... |
|
282 |
// [ENTRY_FRAME_LOCALS] |
|
283 |
// F1 [C_FRAME] |
|
284 |
// ... |
|
285 |
// |
|
286 |
// Just pop the topmost frame ... |
|
287 |
// |
|
288 |
||
289 |
// Restore frame pointer. |
|
290 |
__ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP); |
|
291 |
// Pop frame. Done here to minimize stalls. |
|
46726
7801367e3cc9
8180659: [s390] micro-optimization in resize_frame_absolute()
lucy
parents:
46315
diff
changeset
|
292 |
__ pop_frame(); |
42065 | 293 |
|
294 |
// Reload some volatile registers which we've spilled before the call |
|
295 |
// to frame manager / native entry. |
|
296 |
// Access all locals via frame pointer, because we know nothing about |
|
297 |
// the topmost frame's size. |
|
298 |
__ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp); |
|
299 |
__ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp); |
|
300 |
||
301 |
// Restore non-volatiles. |
|
302 |
__ z_lmg(Z_R6, Z_R14, 16, Z_SP); |
|
303 |
__ z_ld(Z_F8, 96, Z_SP); |
|
304 |
__ z_ld(Z_F9, 104, Z_SP); |
|
305 |
__ z_ld(Z_F10, 112, Z_SP); |
|
306 |
__ z_ld(Z_F11, 120, Z_SP); |
|
307 |
__ z_ld(Z_F12, 128, Z_SP); |
|
308 |
__ z_ld(Z_F13, 136, Z_SP); |
|
309 |
__ z_ld(Z_F14, 144, Z_SP); |
|
310 |
__ z_ld(Z_F15, 152, Z_SP); |
|
311 |
BLOCK_COMMENT("} restore"); |
|
312 |
||
313 |
// |
|
314 |
// Stack on exit from call_stub: |
|
315 |
// |
|
316 |
// 0 [C_FRAME] |
|
317 |
// ... |
|
318 |
// |
|
319 |
// No call_stub frames left. |
|
320 |
// |
|
321 |
||
322 |
// All non-volatiles have been restored at this point!! |
|
323 |
||
324 |
//------------------------------------------------------------------------ |
|
325 |
// The following code makes some assumptions on the T_<type> enum values. |
|
326 |
// The enum is defined in globalDefinitions.hpp. |
|
327 |
// The validity of the assumptions is tested as far as possible. |
|
328 |
// The assigned values should not be shuffled |
|
329 |
// T_BOOLEAN==4 - lowest used enum value |
|
330 |
// T_NARROWOOP==16 - largest used enum value |
|
331 |
//------------------------------------------------------------------------ |
|
332 |
BLOCK_COMMENT("process result {"); |
|
333 |
Label firstHandler; |
|
334 |
int handlerLen= 8; |
|
335 |
#ifdef ASSERT |
|
336 |
char assertMsg[] = "check BasicType definition in globalDefinitions.hpp"; |
|
337 |
__ z_chi(r_arg_result_type, T_BOOLEAN); |
|
338 |
__ asm_assert_low(assertMsg, 0x0234); |
|
339 |
__ z_chi(r_arg_result_type, T_NARROWOOP); |
|
340 |
__ asm_assert_high(assertMsg, 0x0235); |
|
341 |
#endif |
|
342 |
__ add2reg(r_arg_result_type, -T_BOOLEAN); // Remove offset. |
|
343 |
__ z_larl(Z_R1, firstHandler); // location of first handler |
|
344 |
__ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long. |
|
345 |
__ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1); |
|
346 |
||
347 |
__ align(handlerLen); |
|
348 |
__ bind(firstHandler); |
|
349 |
// T_BOOLEAN: |
|
350 |
guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp"); |
|
351 |
__ z_st(Z_RET, 0, r_arg_result_addr); |
|
352 |
__ z_br(Z_R14); // Return to caller. |
|
353 |
__ align(handlerLen); |
|
354 |
// T_CHAR: |
|
355 |
guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp"); |
|
356 |
__ z_st(Z_RET, 0, r_arg_result_addr); |
|
357 |
__ z_br(Z_R14); // Return to caller. |
|
358 |
__ align(handlerLen); |
|
359 |
// T_FLOAT: |
|
360 |
guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp"); |
|
361 |
__ z_ste(Z_FRET, 0, r_arg_result_addr); |
|
362 |
__ z_br(Z_R14); // Return to caller. |
|
363 |
__ align(handlerLen); |
|
364 |
// T_DOUBLE: |
|
365 |
guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp"); |
|
366 |
__ z_std(Z_FRET, 0, r_arg_result_addr); |
|
367 |
__ z_br(Z_R14); // Return to caller. |
|
368 |
__ align(handlerLen); |
|
369 |
// T_BYTE: |
|
370 |
guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp"); |
|
371 |
__ z_st(Z_RET, 0, r_arg_result_addr); |
|
372 |
__ z_br(Z_R14); // Return to caller. |
|
373 |
__ align(handlerLen); |
|
374 |
// T_SHORT: |
|
375 |
guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp"); |
|
376 |
__ z_st(Z_RET, 0, r_arg_result_addr); |
|
377 |
__ z_br(Z_R14); // Return to caller. |
|
378 |
__ align(handlerLen); |
|
379 |
// T_INT: |
|
380 |
guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp"); |
|
381 |
__ z_st(Z_RET, 0, r_arg_result_addr); |
|
382 |
__ z_br(Z_R14); // Return to caller. |
|
383 |
__ align(handlerLen); |
|
384 |
// T_LONG: |
|
385 |
guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp"); |
|
386 |
__ z_stg(Z_RET, 0, r_arg_result_addr); |
|
387 |
__ z_br(Z_R14); // Return to caller. |
|
388 |
__ align(handlerLen); |
|
389 |
// T_OBJECT: |
|
390 |
guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp"); |
|
391 |
__ z_stg(Z_RET, 0, r_arg_result_addr); |
|
392 |
__ z_br(Z_R14); // Return to caller. |
|
393 |
__ align(handlerLen); |
|
394 |
// T_ARRAY: |
|
395 |
guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp"); |
|
396 |
__ z_stg(Z_RET, 0, r_arg_result_addr); |
|
397 |
__ z_br(Z_R14); // Return to caller. |
|
398 |
__ align(handlerLen); |
|
399 |
// T_VOID: |
|
400 |
guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp"); |
|
401 |
__ z_stg(Z_RET, 0, r_arg_result_addr); |
|
402 |
__ z_br(Z_R14); // Return to caller. |
|
403 |
__ align(handlerLen); |
|
404 |
// T_ADDRESS: |
|
405 |
guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp"); |
|
406 |
__ z_stg(Z_RET, 0, r_arg_result_addr); |
|
407 |
__ z_br(Z_R14); // Return to caller. |
|
408 |
__ align(handlerLen); |
|
409 |
// T_NARROWOOP: |
|
410 |
guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp"); |
|
411 |
__ z_st(Z_RET, 0, r_arg_result_addr); |
|
412 |
__ z_br(Z_R14); // Return to caller. |
|
413 |
__ align(handlerLen); |
|
414 |
BLOCK_COMMENT("} process result"); |
|
415 |
} |
|
416 |
return start; |
|
417 |
} |
|
418 |
||
419 |
// Return point for a Java call if there's an exception thrown in |
|
420 |
// Java code. The exception is caught and transformed into a |
|
421 |
// pending exception stored in JavaThread that can be tested from |
|
422 |
// within the VM. |
|
423 |
address generate_catch_exception() { |
|
424 |
StubCodeMark mark(this, "StubRoutines", "catch_exception"); |
|
425 |
||
426 |
address start = __ pc(); |
|
427 |
||
428 |
// |
|
429 |
// Registers alive |
|
430 |
// |
|
431 |
// Z_thread |
|
432 |
// Z_ARG1 - address of pending exception |
|
433 |
// Z_ARG2 - return address in call stub |
|
434 |
// |
|
435 |
||
436 |
const Register exception_file = Z_R0; |
|
437 |
const Register exception_line = Z_R1; |
|
438 |
||
439 |
__ load_const_optimized(exception_file, (void*)__FILE__); |
|
440 |
__ load_const_optimized(exception_line, (void*)__LINE__); |
|
441 |
||
442 |
__ z_stg(Z_ARG1, thread_(pending_exception)); |
|
443 |
// Store into `char *'. |
|
444 |
__ z_stg(exception_file, thread_(exception_file)); |
|
445 |
// Store into `int'. |
|
446 |
__ z_st(exception_line, thread_(exception_line)); |
|
447 |
||
448 |
// Complete return to VM. |
|
449 |
assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); |
|
450 |
||
451 |
// Continue in call stub. |
|
452 |
__ z_br(Z_ARG2); |
|
453 |
||
454 |
return start; |
|
455 |
} |
|
456 |
||
457 |
// Continuation point for runtime calls returning with a pending |
|
458 |
// exception. The pending exception check happened in the runtime |
|
459 |
// or native call stub. The pending exception in Thread is |
|
460 |
// converted into a Java-level exception. |
|
461 |
// |
|
462 |
// Read: |
|
463 |
// Z_R14: pc the runtime library callee wants to return to. |
|
464 |
// Since the exception occurred in the callee, the return pc |
|
465 |
// from the point of view of Java is the exception pc. |
|
466 |
// |
|
467 |
// Invalidate: |
|
468 |
// Volatile registers (except below). |
|
469 |
// |
|
470 |
// Update: |
|
471 |
// Z_ARG1: exception |
|
472 |
// (Z_R14 is unchanged and is live out). |
|
473 |
// |
|
474 |
address generate_forward_exception() { |
|
475 |
StubCodeMark mark(this, "StubRoutines", "forward_exception"); |
|
476 |
address start = __ pc(); |
|
477 |
||
478 |
#define pending_exception_offset in_bytes(Thread::pending_exception_offset()) |
|
479 |
#ifdef ASSERT |
|
480 |
// Get pending exception oop. |
|
481 |
__ z_lg(Z_ARG1, pending_exception_offset, Z_thread); |
|
482 |
||
483 |
// Make sure that this code is only executed if there is a pending exception. |
|
484 |
{ |
|
485 |
Label L; |
|
486 |
__ z_ltgr(Z_ARG1, Z_ARG1); |
|
487 |
__ z_brne(L); |
|
488 |
__ stop("StubRoutines::forward exception: no pending exception (1)"); |
|
489 |
__ bind(L); |
|
490 |
} |
|
491 |
||
492 |
__ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop"); |
|
493 |
#endif |
|
494 |
||
495 |
__ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2. |
|
496 |
__ save_return_pc(); |
|
497 |
__ push_frame_abi160(0); |
|
498 |
// Find exception handler. |
|
499 |
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), |
|
500 |
Z_thread, |
|
501 |
Z_ARG2); |
|
502 |
// Copy handler's address. |
|
503 |
__ z_lgr(Z_R1, Z_RET); |
|
504 |
__ pop_frame(); |
|
505 |
__ restore_return_pc(); |
|
506 |
||
507 |
// Set up the arguments for the exception handler: |
|
508 |
// - Z_ARG1: exception oop |
|
509 |
// - Z_ARG2: exception pc |
|
510 |
||
511 |
// Load pending exception oop. |
|
512 |
__ z_lg(Z_ARG1, pending_exception_offset, Z_thread); |
|
513 |
||
514 |
// The exception pc is the return address in the caller, |
|
515 |
// must load it into Z_ARG2 |
|
516 |
__ z_lgr(Z_ARG2, Z_R14); |
|
517 |
||
518 |
#ifdef ASSERT |
|
519 |
// Make sure exception is set. |
|
520 |
{ Label L; |
|
521 |
__ z_ltgr(Z_ARG1, Z_ARG1); |
|
522 |
__ z_brne(L); |
|
523 |
__ stop("StubRoutines::forward exception: no pending exception (2)"); |
|
524 |
__ bind(L); |
|
525 |
} |
|
526 |
#endif |
|
527 |
// Clear the pending exception. |
|
528 |
__ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *)); |
|
529 |
// Jump to exception handler |
|
530 |
__ z_br(Z_R1 /*handler address*/); |
|
531 |
||
532 |
return start; |
|
533 |
||
534 |
#undef pending_exception_offset |
|
535 |
} |
|
536 |
||
537 |
// Continuation point for throwing of implicit exceptions that are |
|
538 |
// not handled in the current activation. Fabricates an exception |
|
539 |
// oop and initiates normal exception dispatching in this |
|
540 |
// frame. Only callee-saved registers are preserved (through the |
|
541 |
// normal RegisterMap handling). If the compiler |
|
542 |
// needs all registers to be preserved between the fault point and |
|
543 |
// the exception handler then it must assume responsibility for that |
|
544 |
// in AbstractCompiler::continuation_for_implicit_null_exception or |
|
545 |
// continuation_for_implicit_division_by_zero_exception. All other |
|
546 |
// implicit exceptions (e.g., NullPointerException or |
|
547 |
// AbstractMethodError on entry) are either at call sites or |
|
548 |
// otherwise assume that stack unwinding will be initiated, so |
|
549 |
// caller saved registers were assumed volatile in the compiler. |
|
550 |
||
551 |
// Note that we generate only this stub into a RuntimeStub, because |
|
552 |
// it needs to be properly traversed and ignored during GC, so we |
|
553 |
// change the meaning of the "__" macro within this method. |
|
554 |
||
555 |
// Note: the routine set_pc_not_at_call_for_caller in |
|
556 |
// SharedRuntime.cpp requires that this code be generated into a |
|
557 |
// RuntimeStub. |
|
558 |
#undef __ |
|
559 |
#define __ masm-> |
|
560 |
||
561 |
address generate_throw_exception(const char* name, address runtime_entry, |
|
562 |
bool restore_saved_exception_pc, |
|
563 |
Register arg1 = noreg, Register arg2 = noreg) { |
|
46726
7801367e3cc9
8180659: [s390] micro-optimization in resize_frame_absolute()
lucy
parents:
46315
diff
changeset
|
564 |
assert_different_registers(arg1, Z_R0_scratch); // would be destroyed by push_frame() |
7801367e3cc9
8180659: [s390] micro-optimization in resize_frame_absolute()
lucy
parents:
46315
diff
changeset
|
565 |
assert_different_registers(arg2, Z_R0_scratch); // would be destroyed by push_frame() |
7801367e3cc9
8180659: [s390] micro-optimization in resize_frame_absolute()
lucy
parents:
46315
diff
changeset
|
566 |
|
42065 | 567 |
int insts_size = 256; |
568 |
int locs_size = 0; |
|
569 |
CodeBuffer code(name, insts_size, locs_size); |
|
570 |
MacroAssembler* masm = new MacroAssembler(&code); |
|
571 |
int framesize_in_bytes; |
|
572 |
address start = __ pc(); |
|
573 |
||
574 |
__ save_return_pc(); |
|
575 |
framesize_in_bytes = __ push_frame_abi160(0); |
|
576 |
||
577 |
address frame_complete_pc = __ pc(); |
|
578 |
if (restore_saved_exception_pc) { |
|
579 |
__ unimplemented("StubGenerator::throw_exception", 74); |
|
580 |
} |
|
581 |
||
582 |
// Note that we always have a runtime stub frame on the top of stack at this point. |
|
583 |
__ get_PC(Z_R1); |
|
584 |
__ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1); |
|
585 |
||
586 |
// Do the call. |
|
587 |
BLOCK_COMMENT("call runtime_entry"); |
|
588 |
__ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2); |
|
589 |
||
590 |
__ reset_last_Java_frame(); |
|
591 |
||
592 |
#ifdef ASSERT |
|
593 |
// Make sure that this code is only executed if there is a pending exception. |
|
594 |
{ Label L; |
|
595 |
__ z_lg(Z_R0, |
|
596 |
in_bytes(Thread::pending_exception_offset()), |
|
597 |
Z_thread); |
|
598 |
__ z_ltgr(Z_R0, Z_R0); |
|
599 |
__ z_brne(L); |
|
600 |
__ stop("StubRoutines::throw_exception: no pending exception"); |
|
601 |
__ bind(L); |
|
602 |
} |
|
603 |
#endif |
|
604 |
||
605 |
__ pop_frame(); |
|
606 |
__ restore_return_pc(); |
|
607 |
||
608 |
__ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry()); |
|
609 |
__ z_br(Z_R1); |
|
610 |
||
611 |
RuntimeStub* stub = |
|
612 |
RuntimeStub::new_runtime_stub(name, &code, |
|
613 |
frame_complete_pc - start, |
|
614 |
framesize_in_bytes/wordSize, |
|
615 |
NULL /*oop_maps*/, false); |
|
616 |
||
617 |
return stub->entry_point(); |
|
618 |
} |
|
619 |
||
620 |
#undef __ |
|
621 |
#ifdef PRODUCT |
|
622 |
#define __ _masm-> |
|
623 |
#else |
|
624 |
#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> |
|
625 |
#endif |
|
626 |
||
627 |
// Support for uint StubRoutine::zarch::partial_subtype_check(Klass |
|
628 |
// sub, Klass super); |
|
629 |
// |
|
630 |
// Arguments: |
|
631 |
// ret : Z_RET, returned |
|
632 |
// sub : Z_ARG2, argument, not changed |
|
633 |
// super: Z_ARG3, argument, not changed |
|
634 |
// |
|
635 |
// raddr: Z_R14, blown by call |
|
636 |
// |
|
637 |
address generate_partial_subtype_check() { |
|
638 |
StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); |
|
639 |
Label miss; |
|
640 |
||
641 |
address start = __ pc(); |
|
642 |
||
643 |
const Register Rsubklass = Z_ARG2; // subklass |
|
644 |
const Register Rsuperklass = Z_ARG3; // superklass |
|
645 |
||
646 |
// No args, but tmp registers that are killed. |
|
647 |
const Register Rlength = Z_ARG4; // cache array length |
|
648 |
const Register Rarray_ptr = Z_ARG5; // Current value from cache array. |
|
649 |
||
650 |
if (UseCompressedOops) { |
|
651 |
assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub"); |
|
652 |
} |
|
653 |
||
654 |
// Always take the slow path (see SPARC). |
|
655 |
__ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, |
|
656 |
Rarray_ptr, Rlength, NULL, &miss); |
|
657 |
||
658 |
// Match falls through here. |
|
659 |
__ clear_reg(Z_RET); // Zero indicates a match. Set EQ flag in CC. |
|
660 |
__ z_br(Z_R14); |
|
661 |
||
662 |
__ BIND(miss); |
|
663 |
__ load_const_optimized(Z_RET, 1); // One indicates a miss. |
|
664 |
__ z_ltgr(Z_RET, Z_RET); // Set NE flag in CR. |
|
665 |
__ z_br(Z_R14); |
|
666 |
||
667 |
return start; |
|
668 |
} |
|
669 |
||
670 |
// Return address of code to be called from code generated by |
|
671 |
// MacroAssembler::verify_oop. |
|
672 |
// |
|
673 |
// Don't generate, rather use C++ code. |
|
674 |
address generate_verify_oop_subroutine() { |
|
675 |
// Don't generate a StubCodeMark, because no code is generated! |
|
676 |
// Generating the mark triggers notifying the oprofile jvmti agent |
|
677 |
// about the dynamic code generation, but the stub without |
|
678 |
// code (code_size == 0) confuses opjitconv |
|
679 |
// StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); |
|
680 |
||
681 |
address start = 0; |
|
682 |
return start; |
|
683 |
} |
|
684 |
||
685 |
// This is to test that the count register contains a positive int value. |
|
686 |
// Required because C2 does not respect int to long conversion for stub calls. |
|
687 |
void assert_positive_int(Register count) { |
|
688 |
#ifdef ASSERT |
|
689 |
__ z_srag(Z_R0, count, 31); // Just leave the sign (must be zero) in Z_R0. |
|
690 |
__ asm_assert_eq("missing zero extend", 0xAFFE); |
|
691 |
#endif |
|
692 |
} |
|
693 |
||
694 |
// Generate overlap test for array copy stubs. |
|
695 |
// If no actual overlap is detected, control is transferred to the |
|
696 |
// "normal" copy stub (entry address passed in disjoint_copy_target). |
|
697 |
// Otherwise, execution continues with the code generated by the |
|
698 |
// caller of array_overlap_test. |
|
699 |
// |
|
700 |
// Input: |
|
701 |
// Z_ARG1 - from |
|
702 |
// Z_ARG2 - to |
|
703 |
// Z_ARG3 - element count |
|
704 |
void array_overlap_test(address disjoint_copy_target, int log2_elem_size) { |
|
705 |
__ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh, |
|
706 |
disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); |
|
707 |
||
708 |
Register index = Z_ARG3; |
|
709 |
if (log2_elem_size > 0) { |
|
710 |
__ z_sllg(Z_R1, Z_ARG3, log2_elem_size); // byte count |
|
711 |
index = Z_R1; |
|
712 |
} |
|
713 |
__ add2reg_with_index(Z_R1, 0, index, Z_ARG1); // First byte after "from" range. |
|
714 |
||
715 |
__ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh, |
|
716 |
disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); |
|
717 |
||
718 |
// Destructive overlap: let caller generate code for that. |
|
719 |
} |
|
720 |
||
721 |
// Generate stub for disjoint array copy. If "aligned" is true, the |
|
722 |
// "from" and "to" addresses are assumed to be heapword aligned. |
|
723 |
// |
|
724 |
// Arguments for generated stub: |
|
725 |
// from: Z_ARG1 |
|
726 |
// to: Z_ARG2 |
|
727 |
// count: Z_ARG3 treated as signed |
|
728 |
void generate_disjoint_copy(bool aligned, int element_size, |
|
729 |
bool branchToEnd, |
|
730 |
bool restoreArgs) { |
|
731 |
// This is the zarch specific stub generator for general array copy tasks. |
|
732 |
// It has the following prereqs and features: |
|
733 |
// |
|
734 |
// - No destructive overlap allowed (else unpredictable results). |
|
735 |
// - Destructive overlap does not exist if the leftmost byte of the target |
|
736 |
// does not coincide with any of the source bytes (except the leftmost). |
|
737 |
// |
|
738 |
// Register usage upon entry: |
|
739 |
// Z_ARG1 == Z_R2 : address of source array |
|
740 |
// Z_ARG2 == Z_R3 : address of target array |
|
741 |
// Z_ARG3 == Z_R4 : length of operands (# of elements on entry) |
|
742 |
// |
|
743 |
// Register usage within the generator: |
|
744 |
// - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len). |
|
745 |
// Used as pair register operand in complex moves, scratch registers anyway. |
|
746 |
// - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg). |
|
747 |
// Same as R0/R1, but no scratch register. |
|
748 |
// - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine, |
|
749 |
// but they might get temporarily overwritten. |
|
750 |
||
751 |
Register save_reg = Z_ARG4; // (= Z_R5), holds original target operand address for restore. |
|
752 |
||
753 |
{ |
|
754 |
Register llen_reg = Z_R1; // Holds left operand len (odd reg). |
|
755 |
Register laddr_reg = Z_R0; // Holds left operand addr (even reg), overlaps with data_reg. |
|
756 |
Register rlen_reg = Z_R5; // Holds right operand len (odd reg), overlaps with save_reg. |
|
757 |
Register raddr_reg = Z_R4; // Holds right operand addr (even reg), overlaps with len_reg. |
|
758 |
||
759 |
Register data_reg = Z_R0; // Holds copied data chunk in alignment process and copy loop. |
|
760 |
Register len_reg = Z_ARG3; // Holds operand len (#elements at entry, #bytes shortly after). |
|
761 |
Register dst_reg = Z_ARG2; // Holds left (target) operand addr. |
|
762 |
Register src_reg = Z_ARG1; // Holds right (source) operand addr. |
|
763 |
||
764 |
Label doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate; |
|
765 |
Label doMVCUnrolled; |
|
766 |
NearLabel doMVC, doMVCgeneral, done; |
|
767 |
Label MVC_template; |
|
768 |
address pcMVCblock_b, pcMVCblock_e; |
|
769 |
||
770 |
bool usedMVCLE = true; |
|
771 |
bool usedMVCLOOP = true; |
|
772 |
bool usedMVCUnrolled = false; |
|
773 |
bool usedMVC = false; |
|
774 |
bool usedMVCgeneral = false; |
|
775 |
||
776 |
int stride; |
|
777 |
Register stride_reg; |
|
778 |
Register ix_reg; |
|
779 |
||
780 |
assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2"); |
|
781 |
unsigned int log2_size = exact_log2(element_size); |
|
782 |
||
783 |
switch (element_size) { |
|
784 |
case 1: BLOCK_COMMENT("ARRAYCOPY DISJOINT byte {"); break; |
|
785 |
case 2: BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break; |
|
786 |
case 4: BLOCK_COMMENT("ARRAYCOPY DISJOINT int {"); break; |
|
787 |
case 8: BLOCK_COMMENT("ARRAYCOPY DISJOINT long {"); break; |
|
788 |
default: BLOCK_COMMENT("ARRAYCOPY DISJOINT {"); break; |
|
789 |
} |
|
790 |
||
791 |
assert_positive_int(len_reg); |
|
792 |
||
793 |
BLOCK_COMMENT("preparation {"); |
|
794 |
||
795 |
// No copying if len <= 0. |
|
796 |
if (branchToEnd) { |
|
797 |
__ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done); |
|
798 |
} else { |
|
799 |
if (VM_Version::has_CompareBranch()) { |
|
800 |
__ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14); |
|
801 |
} else { |
|
802 |
__ z_ltgr(len_reg, len_reg); |
|
803 |
__ z_bcr(Assembler::bcondNotPositive, Z_R14); |
|
804 |
} |
|
805 |
} |
|
806 |
||
807 |
// Prefetch just one cache line. Speculative opt for short arrays. |
|
808 |
// Do not use Z_R1 in prefetch. Is undefined here. |
|
809 |
if (VM_Version::has_Prefetch()) { |
|
810 |
__ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. |
|
811 |
__ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. |
|
812 |
} |
|
813 |
||
814 |
BLOCK_COMMENT("} preparation"); |
|
815 |
||
816 |
// Save args only if really needed. |
|
817 |
// Keep len test local to branch. Is generated only once. |
|
818 |
||
819 |
BLOCK_COMMENT("mode selection {"); |
|
820 |
||
821 |
// Special handling for arrays with only a few elements. |
|
822 |
// Nothing fancy: just an executed MVC. |
|
823 |
if (log2_size > 0) { |
|
824 |
__ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1. |
|
825 |
} |
|
826 |
if (element_size != 8) { |
|
827 |
__ z_cghi(len_reg, 256/element_size); |
|
828 |
__ z_brnh(doMVC); |
|
829 |
usedMVC = true; |
|
830 |
} |
|
831 |
if (element_size == 8) { // Long and oop arrays are always aligned. |
|
832 |
__ z_cghi(len_reg, 256/element_size); |
|
833 |
__ z_brnh(doMVCUnrolled); |
|
834 |
usedMVCUnrolled = true; |
|
835 |
} |
|
836 |
||
837 |
// Prefetch another cache line. We, for sure, have more than one line to copy. |
|
838 |
if (VM_Version::has_Prefetch()) { |
|
839 |
__ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access. |
|
840 |
__ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access. |
|
841 |
} |
|
842 |
||
843 |
if (restoreArgs) { |
|
844 |
// Remember entry value of ARG2 to restore all arguments later from that knowledge. |
|
845 |
__ z_lgr(save_reg, dst_reg); |
|
846 |
} |
|
847 |
||
848 |
__ z_cghi(len_reg, 4096/element_size); |
|
849 |
if (log2_size == 0) { |
|
850 |
__ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes |
|
851 |
} |
|
852 |
__ z_brnh(doMVCLOOP); |
|
853 |
||
854 |
// Fall through to MVCLE case. |
|
855 |
||
856 |
BLOCK_COMMENT("} mode selection"); |
|
857 |
||
858 |
// MVCLE: for long arrays |
|
859 |
// DW aligned: Best performance for sizes > 4kBytes. |
|
860 |
// unaligned: Least complex for sizes > 256 bytes. |
|
861 |
if (usedMVCLE) { |
|
862 |
BLOCK_COMMENT("mode MVCLE {"); |
|
863 |
||
864 |
// Setup registers for mvcle. |
|
865 |
//__ z_lgr(llen_reg, len_reg);// r1 <- r4 #bytes already in Z_R1, aka llen_reg. |
|
866 |
__ z_lgr(laddr_reg, dst_reg); // r0 <- r3 |
|
867 |
__ z_lgr(raddr_reg, src_reg); // r4 <- r2 |
|
868 |
__ z_lgr(rlen_reg, llen_reg); // r5 <- r1 |
|
869 |
||
870 |
__ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0); // special: bypass cache |
|
871 |
// __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache. |
|
872 |
// __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0); |
|
873 |
||
874 |
if (restoreArgs) { |
|
875 |
// MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs. |
|
876 |
// Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required. |
|
877 |
// Len_reg (Z_ARG3) is destroyed and must be restored. |
|
878 |
__ z_slgr(laddr_reg, dst_reg); // copied #bytes |
|
879 |
if (log2_size > 0) { |
|
880 |
__ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements. |
|
881 |
} else { |
|
882 |
__ z_lgr(Z_ARG3, laddr_reg); |
|
883 |
} |
|
884 |
} |
|
885 |
if (branchToEnd) { |
|
886 |
__ z_bru(done); |
|
887 |
} else { |
|
888 |
__ z_br(Z_R14); |
|
889 |
} |
|
890 |
BLOCK_COMMENT("} mode MVCLE"); |
|
891 |
} |
|
892 |
// No fallthru possible here. |
|
893 |
||
894 |
// MVCUnrolled: for short, aligned arrays. |
|
895 |
||
896 |
if (usedMVCUnrolled) { |
|
897 |
BLOCK_COMMENT("mode MVC unrolled {"); |
|
898 |
stride = 8; |
|
899 |
||
900 |
// Generate unrolled MVC instructions. |
|
901 |
for (int ii = 32; ii > 1; ii--) { |
|
902 |
__ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy |
|
903 |
if (branchToEnd) { |
|
904 |
__ z_bru(done); |
|
905 |
} else { |
|
906 |
__ z_br(Z_R14); |
|
907 |
} |
|
908 |
} |
|
909 |
||
910 |
pcMVCblock_b = __ pc(); |
|
911 |
__ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy |
|
912 |
if (branchToEnd) { |
|
913 |
__ z_bru(done); |
|
914 |
} else { |
|
915 |
__ z_br(Z_R14); |
|
916 |
} |
|
917 |
||
918 |
pcMVCblock_e = __ pc(); |
|
919 |
Label MVC_ListEnd; |
|
920 |
__ bind(MVC_ListEnd); |
|
921 |
||
922 |
// This is an absolute fast path: |
|
923 |
// - Array len in bytes must be not greater than 256. |
|
924 |
// - Array len in bytes must be an integer mult of DW |
|
925 |
// to save expensive handling of trailing bytes. |
|
926 |
// - Argument restore is not done, |
|
927 |
// i.e. previous code must not alter arguments (this code doesn't either). |
|
928 |
||
929 |
__ bind(doMVCUnrolled); |
|
930 |
||
931 |
// Avoid mul, prefer shift where possible. |
|
932 |
// Combine shift right (for #DW) with shift left (for block size). |
|
933 |
// Set CC for zero test below (asm_assert). |
|
934 |
// Note: #bytes comes in Z_R1, #DW in len_reg. |
|
935 |
unsigned int MVCblocksize = pcMVCblock_e - pcMVCblock_b; |
|
936 |
unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning). |
|
937 |
||
938 |
if (log2_size > 0) { // Len was scaled into Z_R1. |
|
939 |
switch (MVCblocksize) { |
|
940 |
||
941 |
case 8: logMVCblocksize = 3; |
|
942 |
__ z_ltgr(Z_R0, Z_R1); // #bytes is index |
|
943 |
break; // reasonable size, use shift |
|
944 |
||
945 |
case 16: logMVCblocksize = 4; |
|
946 |
__ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size); |
|
947 |
break; // reasonable size, use shift |
|
948 |
||
949 |
default: logMVCblocksize = 0; |
|
950 |
__ z_ltgr(Z_R0, len_reg); // #DW for mul |
|
951 |
break; // all other sizes: use mul |
|
952 |
} |
|
953 |
} else { |
|
954 |
guarantee(log2_size, "doMVCUnrolled: only for DW entities"); |
|
955 |
} |
|
956 |
||
957 |
// This test (and branch) is redundant. Previous code makes sure that |
|
958 |
// - element count > 0 |
|
959 |
// - element size == 8. |
|
960 |
// Thus, len reg should never be zero here. We insert an asm_assert() here, |
|
961 |
// just to double-check and to be on the safe side. |
|
962 |
__ asm_assert(false, "zero len cannot occur", 99); |
|
963 |
||
964 |
__ z_larl(Z_R1, MVC_ListEnd); // Get addr of last instr block. |
|
965 |
// Avoid mul, prefer shift where possible. |
|
966 |
if (logMVCblocksize == 0) { |
|
967 |
__ z_mghi(Z_R0, MVCblocksize); |
|
968 |
} |
|
969 |
__ z_slgr(Z_R1, Z_R0); |
|
970 |
__ z_br(Z_R1); |
|
971 |
BLOCK_COMMENT("} mode MVC unrolled"); |
|
972 |
} |
|
973 |
// No fallthru possible here. |
|
974 |
||
975 |
// MVC execute template |
|
976 |
// Must always generate. Usage may be switched on below. |
|
977 |
// There is no suitable place after here to put the template. |
|
978 |
__ bind(MVC_template); |
|
979 |
__ z_mvc(0,0,dst_reg,0,src_reg); // Instr template, never exec directly! |
|
980 |
||
981 |
||
982 |
// MVC Loop: for medium-sized arrays |
|
983 |
||
984 |
// Only for DW aligned arrays (src and dst). |
|
985 |
// #bytes to copy must be at least 256!!! |
|
986 |
// Non-aligned cases handled separately. |
|
987 |
stride = 256; |
|
988 |
stride_reg = Z_R1; // Holds #bytes when control arrives here. |
|
989 |
ix_reg = Z_ARG3; // Alias for len_reg. |
|
990 |
||
991 |
||
992 |
if (usedMVCLOOP) { |
|
993 |
BLOCK_COMMENT("mode MVC loop {"); |
|
994 |
__ bind(doMVCLOOP); |
|
995 |
||
996 |
__ z_lcgr(ix_reg, Z_R1); // Ix runs from -(n-2)*stride to 1*stride (inclusive). |
|
997 |
__ z_llill(stride_reg, stride); |
|
998 |
__ add2reg(ix_reg, 2*stride); // Thus: increment ix by 2*stride. |
|
999 |
||
1000 |
__ bind(doMVCLOOPiterate); |
|
1001 |
__ z_mvc(0, stride-1, dst_reg, 0, src_reg); |
|
1002 |
__ add2reg(dst_reg, stride); |
|
1003 |
__ add2reg(src_reg, stride); |
|
1004 |
__ bind(doMVCLOOPcount); |
|
1005 |
__ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate); |
|
1006 |
||
1007 |
// Don 't use add2reg() here, since we must set the condition code! |
|
1008 |
__ z_aghi(ix_reg, -2*stride); // Compensate incr from above: zero diff means "all copied". |
|
1009 |
||
1010 |
if (restoreArgs) { |
|
1011 |
__ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. |
|
1012 |
__ z_brnz(doMVCgeneral); // We're not done yet, ix_reg is not zero. |
|
1013 |
||
1014 |
// ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg. |
|
1015 |
__ z_slgr(dst_reg, save_reg); // copied #bytes |
|
1016 |
__ z_slgr(src_reg, dst_reg); // = ARG1 (now restored) |
|
1017 |
if (log2_size) { |
|
1018 |
__ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3. |
|
1019 |
} else { |
|
1020 |
__ z_lgr(Z_ARG3, dst_reg); |
|
1021 |
} |
|
1022 |
__ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. |
|
1023 |
||
1024 |
if (branchToEnd) { |
|
1025 |
__ z_bru(done); |
|
1026 |
} else { |
|
1027 |
__ z_br(Z_R14); |
|
1028 |
} |
|
1029 |
||
1030 |
} else { |
|
1031 |
if (branchToEnd) { |
|
1032 |
__ z_brz(done); // CC set by aghi instr. |
|
1033 |
} else { |
|
1034 |
__ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero. |
|
1035 |
} |
|
1036 |
||
1037 |
__ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. |
|
1038 |
// __ z_bru(doMVCgeneral); // fallthru |
|
1039 |
} |
|
1040 |
usedMVCgeneral = true; |
|
1041 |
BLOCK_COMMENT("} mode MVC loop"); |
|
1042 |
} |
|
1043 |
// Fallthru to doMVCgeneral |
|
1044 |
||
1045 |
// MVCgeneral: for short, unaligned arrays, after other copy operations |
|
1046 |
||
1047 |
// Somewhat expensive due to use of EX instruction, but simple. |
|
1048 |
if (usedMVCgeneral) { |
|
1049 |
BLOCK_COMMENT("mode MVC general {"); |
|
1050 |
__ bind(doMVCgeneral); |
|
1051 |
||
1052 |
__ add2reg(len_reg, -1, Z_R1); // Get #bytes-1 for EXECUTE. |
|
1053 |
if (VM_Version::has_ExecuteExtensions()) { |
|
1054 |
__ z_exrl(len_reg, MVC_template); // Execute MVC with variable length. |
|
1055 |
} else { |
|
1056 |
__ z_larl(Z_R1, MVC_template); // Get addr of instr template. |
|
1057 |
__ z_ex(len_reg, 0, Z_R0, Z_R1); // Execute MVC with variable length. |
|
1058 |
} // penalty: 9 ticks |
|
1059 |
||
1060 |
if (restoreArgs) { |
|
1061 |
// ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg |
|
1062 |
__ z_slgr(dst_reg, save_reg); // Copied #bytes without the "doMVCgeneral" chunk |
|
1063 |
__ z_slgr(src_reg, dst_reg); // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk |
|
1064 |
__ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet. |
|
1065 |
if (log2_size) { |
|
1066 |
__ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3 |
|
1067 |
} else { |
|
1068 |
__ z_lgr(Z_ARG3, dst_reg); |
|
1069 |
} |
|
1070 |
__ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. |
|
1071 |
} |
|
1072 |
||
1073 |
if (usedMVC) { |
|
1074 |
if (branchToEnd) { |
|
1075 |
__ z_bru(done); |
|
1076 |
} else { |
|
1077 |
__ z_br(Z_R14); |
|
1078 |
} |
|
1079 |
} else { |
|
1080 |
if (!branchToEnd) __ z_br(Z_R14); |
|
1081 |
} |
|
1082 |
BLOCK_COMMENT("} mode MVC general"); |
|
1083 |
} |
|
1084 |
// Fallthru possible if following block not generated. |
|
1085 |
||
1086 |
// MVC: for short, unaligned arrays |
|
1087 |
||
1088 |
// Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks. |
|
1089 |
// Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4. |
|
1090 |
if (usedMVC) { |
|
1091 |
BLOCK_COMMENT("mode MVC {"); |
|
1092 |
__ bind(doMVC); |
|
1093 |
||
1094 |
// get #bytes-1 for EXECUTE |
|
1095 |
if (log2_size) { |
|
1096 |
__ add2reg(Z_R1, -1); // Length was scaled into Z_R1. |
|
1097 |
} else { |
|
1098 |
__ add2reg(Z_R1, -1, len_reg); // Length was not scaled. |
|
1099 |
} |
|
1100 |
||
1101 |
if (VM_Version::has_ExecuteExtensions()) { |
|
1102 |
__ z_exrl(Z_R1, MVC_template); // Execute MVC with variable length. |
|
1103 |
} else { |
|
1104 |
__ z_lgr(Z_R0, Z_R5); // Save ARG4, may be unnecessary. |
|
1105 |
__ z_larl(Z_R5, MVC_template); // Get addr of instr template. |
|
1106 |
__ z_ex(Z_R1, 0, Z_R0, Z_R5); // Execute MVC with variable length. |
|
1107 |
__ z_lgr(Z_R5, Z_R0); // Restore ARG4, may be unnecessary. |
|
1108 |
} |
|
1109 |
||
1110 |
if (!branchToEnd) { |
|
1111 |
__ z_br(Z_R14); |
|
1112 |
} |
|
1113 |
BLOCK_COMMENT("} mode MVC"); |
|
1114 |
} |
|
1115 |
||
1116 |
__ bind(done); |
|
1117 |
||
1118 |
switch (element_size) { |
|
1119 |
case 1: BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break; |
|
1120 |
case 2: BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break; |
|
1121 |
case 4: BLOCK_COMMENT("} ARRAYCOPY DISJOINT int "); break; |
|
1122 |
case 8: BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break; |
|
1123 |
default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT "); break; |
|
1124 |
} |
|
1125 |
} |
|
1126 |
} |
|
1127 |
||
1128 |
// Generate stub for conjoint array copy. If "aligned" is true, the |
|
1129 |
// "from" and "to" addresses are assumed to be heapword aligned. |
|
1130 |
// |
|
1131 |
// Arguments for generated stub: |
|
1132 |
// from: Z_ARG1 |
|
1133 |
// to: Z_ARG2 |
|
1134 |
// count: Z_ARG3 treated as signed |
|
1135 |
void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) { |
|
1136 |
||
1137 |
// This is the zarch specific stub generator for general array copy tasks. |
|
1138 |
// It has the following prereqs and features: |
|
1139 |
// |
|
1140 |
// - Destructive overlap exists and is handled by reverse copy. |
|
1141 |
// - Destructive overlap exists if the leftmost byte of the target |
|
1142 |
// does coincide with any of the source bytes (except the leftmost). |
|
1143 |
// - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride) |
|
1144 |
// - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine. |
|
1145 |
// - Z_ARG3 is USED but preserved by the stub routine. |
|
1146 |
// - Z_ARG4 is used as index register and is thus KILLed. |
|
1147 |
// |
|
1148 |
{ |
|
1149 |
Register stride_reg = Z_R1; // Stride & compare value in loop (negative element_size). |
|
1150 |
Register data_reg = Z_R0; // Holds value of currently processed element. |
|
1151 |
Register ix_reg = Z_ARG4; // Holds byte index of currently processed element. |
|
1152 |
Register len_reg = Z_ARG3; // Holds length (in #elements) of arrays. |
|
1153 |
Register dst_reg = Z_ARG2; // Holds left operand addr. |
|
1154 |
Register src_reg = Z_ARG1; // Holds right operand addr. |
|
1155 |
||
1156 |
assert(256%element_size == 0, "Element size must be power of 2."); |
|
1157 |
assert(element_size <= 8, "Can't handle more than DW units."); |
|
1158 |
||
1159 |
switch (element_size) { |
|
1160 |
case 1: BLOCK_COMMENT("ARRAYCOPY CONJOINT byte {"); break; |
|
1161 |
case 2: BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break; |
|
1162 |
case 4: BLOCK_COMMENT("ARRAYCOPY CONJOINT int {"); break; |
|
1163 |
case 8: BLOCK_COMMENT("ARRAYCOPY CONJOINT long {"); break; |
|
1164 |
default: BLOCK_COMMENT("ARRAYCOPY CONJOINT {"); break; |
|
1165 |
} |
|
1166 |
||
1167 |
assert_positive_int(len_reg); |
|
1168 |
||
1169 |
if (VM_Version::has_Prefetch()) { |
|
1170 |
__ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. |
|
1171 |
__ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. |
|
1172 |
} |
|
1173 |
||
1174 |
unsigned int log2_size = exact_log2(element_size); |
|
1175 |
if (log2_size) { |
|
1176 |
__ z_sllg(ix_reg, len_reg, log2_size); |
|
1177 |
} else { |
|
1178 |
__ z_lgr(ix_reg, len_reg); |
|
1179 |
} |
|
1180 |
||
1181 |
// Optimize reverse copy loop. |
|
1182 |
// Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks. |
|
1183 |
// Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic. |
|
1184 |
// Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length. |
|
1185 |
||
1186 |
Label countLoop1; |
|
1187 |
Label copyLoop1; |
|
1188 |
Label skipBY; |
|
1189 |
Label skipHW; |
|
1190 |
int stride = -8; |
|
1191 |
||
1192 |
__ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop. |
|
1193 |
||
1194 |
if (element_size == 8) // Nothing to do here. |
|
1195 |
__ z_bru(countLoop1); |
|
1196 |
else { // Do not generate dead code. |
|
1197 |
__ z_tmll(ix_reg, 7); // Check the "odd" bits. |
|
1198 |
__ z_bre(countLoop1); // There are none, very good! |
|
1199 |
} |
|
1200 |
||
1201 |
if (log2_size == 0) { // Handle leftover Byte. |
|
1202 |
__ z_tmll(ix_reg, 1); |
|
1203 |
__ z_bre(skipBY); |
|
1204 |
__ z_lb(data_reg, -1, ix_reg, src_reg); |
|
1205 |
__ z_stcy(data_reg, -1, ix_reg, dst_reg); |
|
1206 |
__ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI. |
|
1207 |
__ bind(skipBY); |
|
1208 |
// fallthru |
|
1209 |
} |
|
1210 |
if (log2_size <= 1) { // Handle leftover HW. |
|
1211 |
__ z_tmll(ix_reg, 2); |
|
1212 |
__ z_bre(skipHW); |
|
1213 |
__ z_lhy(data_reg, -2, ix_reg, src_reg); |
|
1214 |
__ z_sthy(data_reg, -2, ix_reg, dst_reg); |
|
1215 |
__ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI. |
|
1216 |
__ bind(skipHW); |
|
1217 |
__ z_tmll(ix_reg, 4); |
|
1218 |
__ z_bre(countLoop1); |
|
1219 |
// fallthru |
|
1220 |
} |
|
1221 |
if (log2_size <= 2) { // There are just 4 bytes (left) that need to be copied. |
|
1222 |
__ z_ly(data_reg, -4, ix_reg, src_reg); |
|
1223 |
__ z_sty(data_reg, -4, ix_reg, dst_reg); |
|
1224 |
__ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI. |
|
1225 |
__ z_bru(countLoop1); |
|
1226 |
} |
|
1227 |
||
1228 |
// Control can never get to here. Never! Never ever! |
|
1229 |
__ z_illtrap(0x99); |
|
1230 |
__ bind(copyLoop1); |
|
1231 |
__ z_lg(data_reg, 0, ix_reg, src_reg); |
|
1232 |
__ z_stg(data_reg, 0, ix_reg, dst_reg); |
|
1233 |
__ bind(countLoop1); |
|
1234 |
__ z_brxhg(ix_reg, stride_reg, copyLoop1); |
|
1235 |
||
1236 |
if (!branchToEnd) |
|
1237 |
__ z_br(Z_R14); |
|
1238 |
||
1239 |
switch (element_size) { |
|
1240 |
case 1: BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break; |
|
1241 |
case 2: BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break; |
|
1242 |
case 4: BLOCK_COMMENT("} ARRAYCOPY CONJOINT int "); break; |
|
1243 |
case 8: BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break; |
|
1244 |
default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT "); break; |
|
1245 |
} |
|
1246 |
} |
|
1247 |
} |
|
1248 |
||
1249 |
// Generate stub for disjoint byte copy. If "aligned" is true, the |
|
1250 |
// "from" and "to" addresses are assumed to be heapword aligned. |
|
1251 |
address generate_disjoint_byte_copy(bool aligned, const char * name) { |
|
1252 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1253 |
||
1254 |
// This is the zarch specific stub generator for byte array copy. |
|
1255 |
// Refer to generate_disjoint_copy for a list of prereqs and features: |
|
1256 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1257 |
generate_disjoint_copy(aligned, 1, false, false); |
|
1258 |
return __ addr_at(start_off); |
|
1259 |
} |
|
1260 |
||
1261 |
||
1262 |
address generate_disjoint_short_copy(bool aligned, const char * name) { |
|
1263 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1264 |
// This is the zarch specific stub generator for short array copy. |
|
1265 |
// Refer to generate_disjoint_copy for a list of prereqs and features: |
|
1266 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1267 |
generate_disjoint_copy(aligned, 2, false, false); |
|
1268 |
return __ addr_at(start_off); |
|
1269 |
} |
|
1270 |
||
1271 |
||
1272 |
address generate_disjoint_int_copy(bool aligned, const char * name) { |
|
1273 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1274 |
// This is the zarch specific stub generator for int array copy. |
|
1275 |
// Refer to generate_disjoint_copy for a list of prereqs and features: |
|
1276 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1277 |
generate_disjoint_copy(aligned, 4, false, false); |
|
1278 |
return __ addr_at(start_off); |
|
1279 |
} |
|
1280 |
||
1281 |
||
1282 |
address generate_disjoint_long_copy(bool aligned, const char * name) { |
|
1283 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1284 |
// This is the zarch specific stub generator for long array copy. |
|
1285 |
// Refer to generate_disjoint_copy for a list of prereqs and features: |
|
1286 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1287 |
generate_disjoint_copy(aligned, 8, false, false); |
|
1288 |
return __ addr_at(start_off); |
|
1289 |
} |
|
1290 |
||
1291 |
||
1292 |
address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { |
|
1293 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1294 |
// This is the zarch specific stub generator for oop array copy. |
|
1295 |
// Refer to generate_disjoint_copy for a list of prereqs and features. |
|
1296 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1297 |
unsigned int size = UseCompressedOops ? 4 : 8; |
|
1298 |
||
50728 | 1299 |
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; |
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1300 |
if (dest_uninitialized) { |
50728 | 1301 |
decorators |= IS_DEST_UNINITIALIZED; |
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1302 |
} |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1303 |
if (aligned) { |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1304 |
decorators |= ARRAYCOPY_ALIGNED; |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1305 |
} |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1306 |
|
49754 | 1307 |
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); |
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1308 |
bs->arraycopy_prologue(_masm, decorators, T_OBJECT, Z_ARG1, Z_ARG2, Z_ARG3); |
42065 | 1309 |
|
1310 |
generate_disjoint_copy(aligned, size, true, true); |
|
1311 |
||
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1312 |
bs->arraycopy_epilogue(_masm, decorators, T_OBJECT, Z_ARG2, Z_ARG3, true); |
42065 | 1313 |
|
1314 |
return __ addr_at(start_off); |
|
1315 |
} |
|
1316 |
||
1317 |
||
1318 |
address generate_conjoint_byte_copy(bool aligned, const char * name) { |
|
1319 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1320 |
// This is the zarch specific stub generator for overlapping byte array copy. |
|
1321 |
// Refer to generate_conjoint_copy for a list of prereqs and features: |
|
1322 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1323 |
address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy() |
|
1324 |
: StubRoutines::jbyte_disjoint_arraycopy(); |
|
1325 |
||
1326 |
array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint. |
|
1327 |
generate_conjoint_copy(aligned, 1, false); |
|
1328 |
||
1329 |
return __ addr_at(start_off); |
|
1330 |
} |
|
1331 |
||
1332 |
||
1333 |
address generate_conjoint_short_copy(bool aligned, const char * name) { |
|
1334 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1335 |
// This is the zarch specific stub generator for overlapping short array copy. |
|
1336 |
// Refer to generate_conjoint_copy for a list of prereqs and features: |
|
1337 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1338 |
address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy() |
|
1339 |
: StubRoutines::jshort_disjoint_arraycopy(); |
|
1340 |
||
1341 |
array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint. |
|
1342 |
generate_conjoint_copy(aligned, 2, false); |
|
1343 |
||
1344 |
return __ addr_at(start_off); |
|
1345 |
} |
|
1346 |
||
1347 |
address generate_conjoint_int_copy(bool aligned, const char * name) { |
|
1348 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1349 |
// This is the zarch specific stub generator for overlapping int array copy. |
|
1350 |
// Refer to generate_conjoint_copy for a list of prereqs and features: |
|
1351 |
||
1352 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1353 |
address nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy() |
|
1354 |
: StubRoutines::jint_disjoint_arraycopy(); |
|
1355 |
||
1356 |
array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint. |
|
1357 |
generate_conjoint_copy(aligned, 4, false); |
|
1358 |
||
1359 |
return __ addr_at(start_off); |
|
1360 |
} |
|
1361 |
||
1362 |
address generate_conjoint_long_copy(bool aligned, const char * name) { |
|
1363 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1364 |
// This is the zarch specific stub generator for overlapping long array copy. |
|
1365 |
// Refer to generate_conjoint_copy for a list of prereqs and features: |
|
1366 |
||
1367 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1368 |
address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy() |
|
1369 |
: StubRoutines::jlong_disjoint_arraycopy(); |
|
1370 |
||
1371 |
array_overlap_test(nooverlap_target, 3); // Branch away to nooverlap_target if disjoint. |
|
1372 |
generate_conjoint_copy(aligned, 8, false); |
|
1373 |
||
1374 |
return __ addr_at(start_off); |
|
1375 |
} |
|
1376 |
||
1377 |
address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { |
|
1378 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1379 |
// This is the zarch specific stub generator for overlapping oop array copy. |
|
1380 |
// Refer to generate_conjoint_copy for a list of prereqs and features. |
|
1381 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1382 |
unsigned int size = UseCompressedOops ? 4 : 8; |
|
1383 |
unsigned int shift = UseCompressedOops ? 2 : 3; |
|
1384 |
||
1385 |
address nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized) |
|
1386 |
: StubRoutines::oop_disjoint_arraycopy(dest_uninitialized); |
|
1387 |
||
1388 |
// Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier. |
|
1389 |
array_overlap_test(nooverlap_target, shift); // Branch away to nooverlap_target if disjoint. |
|
1390 |
||
50728 | 1391 |
DecoratorSet decorators = IN_HEAP | IS_ARRAY; |
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1392 |
if (dest_uninitialized) { |
50728 | 1393 |
decorators |= IS_DEST_UNINITIALIZED; |
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1394 |
} |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1395 |
if (aligned) { |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1396 |
decorators |= ARRAYCOPY_ALIGNED; |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1397 |
} |
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1398 |
|
49754 | 1399 |
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); |
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1400 |
bs->arraycopy_prologue(_masm, decorators, T_OBJECT, Z_ARG1, Z_ARG2, Z_ARG3); |
42065 | 1401 |
|
1402 |
generate_conjoint_copy(aligned, size, true); // Must preserve ARG2, ARG3. |
|
1403 |
||
49484
ee8fa73b90f9
8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents:
49455
diff
changeset
|
1404 |
bs->arraycopy_epilogue(_masm, decorators, T_OBJECT, Z_ARG2, Z_ARG3, true); |
42065 | 1405 |
|
1406 |
return __ addr_at(start_off); |
|
1407 |
} |
|
1408 |
||
1409 |
||
1410 |
void generate_arraycopy_stubs() { |
|
1411 |
||
1412 |
// Note: the disjoint stubs must be generated first, some of |
|
1413 |
// the conjoint stubs use them. |
|
1414 |
StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (false, "jbyte_disjoint_arraycopy"); |
|
1415 |
StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); |
|
1416 |
StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy (false, "jint_disjoint_arraycopy"); |
|
1417 |
StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy (false, "jlong_disjoint_arraycopy"); |
|
1418 |
StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy", false); |
|
1419 |
StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy_uninit", true); |
|
1420 |
||
1421 |
StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (true, "arrayof_jbyte_disjoint_arraycopy"); |
|
1422 |
StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); |
|
1423 |
StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy (true, "arrayof_jint_disjoint_arraycopy"); |
|
1424 |
StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy (true, "arrayof_jlong_disjoint_arraycopy"); |
|
1425 |
StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy", false); |
|
1426 |
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy_uninit", true); |
|
1427 |
||
1428 |
StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy (false, "jbyte_arraycopy"); |
|
1429 |
StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); |
|
1430 |
StubRoutines::_jint_arraycopy = generate_conjoint_int_copy (false, "jint_arraycopy"); |
|
1431 |
StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy (false, "jlong_arraycopy"); |
|
1432 |
StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy (false, "oop_arraycopy", false); |
|
1433 |
StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy (false, "oop_arraycopy_uninit", true); |
|
1434 |
||
1435 |
StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy (true, "arrayof_jbyte_arraycopy"); |
|
1436 |
StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); |
|
1437 |
StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy (true, "arrayof_jint_arraycopy"); |
|
1438 |
StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy (true, "arrayof_jlong_arraycopy"); |
|
1439 |
StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy", false); |
|
1440 |
StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy_uninit", true); |
|
1441 |
} |
|
1442 |
||
1443 |
void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) { |
|
1444 |
||
1445 |
// safefetch signatures: |
|
1446 |
// int SafeFetch32(int* adr, int errValue); |
|
1447 |
// intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); |
|
1448 |
// |
|
1449 |
// arguments: |
|
1450 |
// Z_ARG1 = adr |
|
1451 |
// Z_ARG2 = errValue |
|
1452 |
// |
|
1453 |
// result: |
|
1454 |
// Z_RET = *adr or errValue |
|
1455 |
||
1456 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1457 |
||
1458 |
// entry point |
|
1459 |
// Load *adr into Z_ARG2, may fault. |
|
1460 |
*entry = *fault_pc = __ pc(); |
|
1461 |
switch (size) { |
|
1462 |
case 4: |
|
1463 |
// Sign extended int32_t. |
|
1464 |
__ z_lgf(Z_ARG2, 0, Z_ARG1); |
|
1465 |
break; |
|
1466 |
case 8: |
|
1467 |
// int64_t |
|
1468 |
__ z_lg(Z_ARG2, 0, Z_ARG1); |
|
1469 |
break; |
|
1470 |
default: |
|
1471 |
ShouldNotReachHere(); |
|
1472 |
} |
|
1473 |
||
1474 |
// Return errValue or *adr. |
|
1475 |
*continuation_pc = __ pc(); |
|
1476 |
__ z_lgr(Z_RET, Z_ARG2); |
|
1477 |
__ z_br(Z_R14); |
|
1478 |
||
1479 |
} |
|
1480 |
||
1481 |
// Call interface for AES_encryptBlock, AES_decryptBlock stubs. |
|
1482 |
// |
|
1483 |
// Z_ARG1 - source data block. Ptr to leftmost byte to be processed. |
|
1484 |
// Z_ARG2 - destination data block. Ptr to leftmost byte to be stored. |
|
1485 |
// For in-place encryption/decryption, ARG1 and ARG2 can point |
|
1486 |
// to the same piece of storage. |
|
1487 |
// Z_ARG3 - Crypto key address (expanded key). The first n bits of |
|
1488 |
// the expanded key constitute the original AES-<n> key (see below). |
|
1489 |
// |
|
1490 |
// Z_RET - return value. First unprocessed byte offset in src buffer. |
|
1491 |
// |
|
1492 |
// Some remarks: |
|
1493 |
// The crypto key, as passed from the caller to these encryption stubs, |
|
1494 |
// is a so-called expanded key. It is derived from the original key |
|
1495 |
// by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule |
|
1496 |
// With the expanded key, the cipher/decipher task is decomposed in |
|
1497 |
// multiple, less complex steps, called rounds. Sun SPARC and Intel |
|
1498 |
// processors obviously implement support for those less complex steps. |
|
1499 |
// z/Architecture provides instructions for full cipher/decipher complexity. |
|
1500 |
// Therefore, we need the original, not the expanded key here. |
|
1501 |
// Luckily, the first n bits of an AES-<n> expanded key are formed |
|
1502 |
// by the original key itself. That takes us out of trouble. :-) |
|
1503 |
// The key length (in bytes) relation is as follows: |
|
1504 |
// original expanded rounds key bit keylen |
|
1505 |
// key bytes key bytes length in words |
|
1506 |
// 16 176 11 128 44 |
|
1507 |
// 24 208 13 192 52 |
|
1508 |
// 32 240 15 256 60 |
|
1509 |
// |
|
1510 |
// The crypto instructions used in the AES* stubs have some specific register requirements. |
|
1511 |
// Z_R0 holds the crypto function code. Please refer to the KM/KMC instruction |
|
1512 |
// description in the "z/Architecture Principles of Operation" manual for details. |
|
1513 |
// Z_R1 holds the parameter block address. The parameter block contains the cryptographic key |
|
1514 |
// (KM instruction) and the chaining value (KMC instruction). |
|
1515 |
// dst must designate an even-numbered register, holding the address of the output message. |
|
1516 |
// src must designate an even/odd register pair, holding the address/length of the original message |
|
1517 |
||
1518 |
// Helper function which generates code to |
|
46733 | 1519 |
// - load the function code in register fCode (== Z_R0). |
46726
7801367e3cc9
8180659: [s390] micro-optimization in resize_frame_absolute()
lucy
parents:
46315
diff
changeset
|
1520 |
// - load the data block length (depends on cipher function) into register srclen if requested. |
42065 | 1521 |
// - is_decipher switches between cipher/decipher function codes |
1522 |
// - set_len requests (if true) loading the data block length in register srclen |
|
1523 |
void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) { |
|
1524 |
||
1525 |
BLOCK_COMMENT("Set fCode {"); { |
|
1526 |
Label fCode_set; |
|
1527 |
int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; |
|
1528 |
bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) |
|
1529 |
&& (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); |
|
1530 |
// Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256. |
|
46733 | 1531 |
__ z_cghi(keylen, 52); // Check only once at the beginning. keylen and fCode may share the same register. |
1532 |
||
1533 |
__ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); |
|
42065 | 1534 |
if (!identical_dataBlk_len) { |
46733 | 1535 |
__ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); |
42065 | 1536 |
} |
46733 | 1537 |
__ z_brl(fCode_set); // keyLen < 52: AES128 |
42065 | 1538 |
|
1539 |
__ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); |
|
1540 |
if (!identical_dataBlk_len) { |
|
1541 |
__ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk); |
|
1542 |
} |
|
1543 |
__ z_bre(fCode_set); // keyLen == 52: AES192 |
|
1544 |
||
46733 | 1545 |
__ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); |
42065 | 1546 |
if (!identical_dataBlk_len) { |
46733 | 1547 |
__ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk); |
42065 | 1548 |
} |
46733 | 1549 |
// __ z_brh(fCode_set); // keyLen < 52: AES128 // fallthru |
46726
7801367e3cc9
8180659: [s390] micro-optimization in resize_frame_absolute()
lucy
parents:
46315
diff
changeset
|
1550 |
|
42065 | 1551 |
__ bind(fCode_set); |
1552 |
if (identical_dataBlk_len) { |
|
1553 |
__ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); |
|
1554 |
} |
|
1555 |
} |
|
1556 |
BLOCK_COMMENT("} Set fCode"); |
|
1557 |
} |
|
1558 |
||
1559 |
// Push a parameter block for the cipher/decipher instruction on the stack. |
|
46733 | 1560 |
// Layout of the additional stack space allocated for AES_cipherBlockChaining: |
1561 |
// |
|
1562 |
// | | |
|
1563 |
// +--------+ <-- SP before expansion |
|
1564 |
// | | |
|
1565 |
// : : alignment loss, 0..(AES_parmBlk_align-8) bytes |
|
1566 |
// | | |
|
1567 |
// +--------+ |
|
1568 |
// | | |
|
1569 |
// : : space for parameter block, size VM_Version::Cipher::_AES*_parmBlk_C |
|
1570 |
// | | |
|
1571 |
// +--------+ <-- parmBlk, octoword-aligned, start of parameter block |
|
1572 |
// | | |
|
1573 |
// : : additional stack space for spills etc., size AES_parmBlk_addspace, DW @ Z_SP not usable!!! |
|
1574 |
// | | |
|
1575 |
// +--------+ <-- Z_SP after expansion |
|
1576 |
||
1577 |
void generate_push_Block(int dataBlk_len, int parmBlk_len, int crypto_fCode, |
|
1578 |
Register parmBlk, Register keylen, Register fCode, Register cv, Register key) { |
|
1579 |
const int AES_parmBlk_align = 32; // octoword alignment. |
|
1580 |
const int AES_parmBlk_addspace = 24; // Must be sufficiently large to hold all spilled registers |
|
1581 |
// (currently 2) PLUS 1 DW for the frame pointer. |
|
1582 |
||
1583 |
const int cv_len = dataBlk_len; |
|
1584 |
const int key_len = parmBlk_len - cv_len; |
|
1585 |
// This len must be known at JIT compile time. Only then are we able to recalc the SP before resize. |
|
1586 |
// We buy this knowledge by wasting some (up to AES_parmBlk_align) bytes of stack space. |
|
1587 |
const int resize_len = cv_len + key_len + AES_parmBlk_align + AES_parmBlk_addspace; |
|
1588 |
||
1589 |
// Use parmBlk as temp reg here to hold the frame pointer. |
|
1590 |
__ resize_frame(-resize_len, parmBlk, true); |
|
1591 |
||
1592 |
// calculate parmBlk address from updated (resized) SP. |
|
1593 |
__ add2reg(parmBlk, resize_len - (cv_len + key_len), Z_SP); |
|
1594 |
__ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. |
|
1595 |
||
1596 |
// There is room for stuff in the range [parmBlk-AES_parmBlk_addspace+8, parmBlk). |
|
1597 |
__ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. |
|
1598 |
||
1599 |
// calculate (SP before resize) from updated SP. |
|
1600 |
__ add2reg(keylen, resize_len, Z_SP); // keylen holds prev SP for now. |
|
1601 |
__ z_stg(keylen, -16, parmBlk); // Spill prev SP for easy revert. |
|
1602 |
||
1603 |
__ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. |
|
1604 |
__ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. |
|
1605 |
__ z_lghi(fCode, crypto_fCode); |
|
1606 |
} |
|
1607 |
||
42065 | 1608 |
// NOTE: |
1609 |
// Before returning, the stub has to copy the chaining value from |
|
1610 |
// the parmBlk, where it was updated by the crypto instruction, back |
|
1611 |
// to the chaining value array the address of which was passed in the cv argument. |
|
1612 |
// As all the available registers are used and modified by KMC, we need to save |
|
1613 |
// the key length across the KMC instruction. We do so by spilling it to the stack, |
|
1614 |
// just preceding the parmBlk (at (parmBlk - 8)). |
|
1615 |
void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) { |
|
1616 |
int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; |
|
1617 |
Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; |
|
1618 |
||
1619 |
BLOCK_COMMENT("push parmBlk {"); |
|
1620 |
if (VM_Version::has_Crypto_AES() ) { __ z_cghi(keylen, 52); } |
|
46733 | 1621 |
if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128 |
42065 | 1622 |
if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); } // keyLen == 52: AES192 |
46733 | 1623 |
if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256 |
42065 | 1624 |
|
1625 |
// Security net: requested AES function not available on this CPU. |
|
1626 |
// NOTE: |
|
1627 |
// As of now (March 2015), this safety net is not required. JCE policy files limit the |
|
1628 |
// cryptographic strength of the keys used to 128 bit. If we have AES hardware support |
|
1629 |
// at all, we have at least AES-128. |
|
1630 |
__ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0); |
|
1631 |
||
46733 | 1632 |
if (VM_Version::has_Crypto_AES256()) { |
1633 |
__ bind(parmBlk_256); |
|
1634 |
generate_push_Block(VM_Version::Cipher::_AES256_dataBlk, |
|
1635 |
VM_Version::Cipher::_AES256_parmBlk_C, |
|
1636 |
VM_Version::Cipher::_AES256 + mode, |
|
1637 |
parmBlk, keylen, fCode, cv, key); |
|
1638 |
if (VM_Version::has_Crypto_AES128() || VM_Version::has_Crypto_AES192()) { |
|
42065 | 1639 |
__ z_bru(parmBlk_set); // Fallthru otherwise. |
1640 |
} |
|
1641 |
} |
|
1642 |
||
1643 |
if (VM_Version::has_Crypto_AES192()) { |
|
1644 |
__ bind(parmBlk_192); |
|
46733 | 1645 |
generate_push_Block(VM_Version::Cipher::_AES192_dataBlk, |
1646 |
VM_Version::Cipher::_AES192_parmBlk_C, |
|
1647 |
VM_Version::Cipher::_AES192 + mode, |
|
1648 |
parmBlk, keylen, fCode, cv, key); |
|
1649 |
if (VM_Version::has_Crypto_AES128()) { |
|
42065 | 1650 |
__ z_bru(parmBlk_set); // Fallthru otherwise. |
1651 |
} |
|
1652 |
} |
|
1653 |
||
46733 | 1654 |
if (VM_Version::has_Crypto_AES128()) { |
1655 |
__ bind(parmBlk_128); |
|
1656 |
generate_push_Block(VM_Version::Cipher::_AES128_dataBlk, |
|
1657 |
VM_Version::Cipher::_AES128_parmBlk_C, |
|
1658 |
VM_Version::Cipher::_AES128 + mode, |
|
1659 |
parmBlk, keylen, fCode, cv, key); |
|
1660 |
// Fallthru |
|
42065 | 1661 |
} |
1662 |
||
1663 |
__ bind(parmBlk_set); |
|
1664 |
BLOCK_COMMENT("} push parmBlk"); |
|
1665 |
} |
|
1666 |
||
1667 |
// Pop a parameter block from the stack. The chaining value portion of the parameter block |
|
1668 |
// is copied back to the cv array as it is needed for subsequent cipher steps. |
|
1669 |
// The keylen value as well as the original SP (before resizing) was pushed to the stack |
|
1670 |
// when pushing the parameter block. |
|
1671 |
void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) { |
|
1672 |
||
1673 |
BLOCK_COMMENT("pop parmBlk {"); |
|
1674 |
bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) && |
|
1675 |
(VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); |
|
1676 |
if (identical_dataBlk_len) { |
|
1677 |
int cv_len = VM_Version::Cipher::_AES128_dataBlk; |
|
1678 |
__ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. |
|
1679 |
} else { |
|
1680 |
int cv_len; |
|
1681 |
Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; |
|
1682 |
__ z_lg(keylen, -8, parmBlk); // restore keylen |
|
1683 |
__ z_cghi(keylen, 52); |
|
1684 |
if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256); // keyLen > 52: AES256 |
|
1685 |
if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192); // keyLen == 52: AES192 |
|
1686 |
// if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128); // keyLen < 52: AES128 // fallthru |
|
1687 |
||
1688 |
// Security net: there is no one here. If we would need it, we should have |
|
1689 |
// fallen into it already when pushing the parameter block. |
|
1690 |
if (VM_Version::has_Crypto_AES128()) { |
|
1691 |
__ bind(parmBlk_128); |
|
1692 |
cv_len = VM_Version::Cipher::_AES128_dataBlk; |
|
1693 |
__ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. |
|
1694 |
if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) { |
|
1695 |
__ z_bru(parmBlk_set); |
|
1696 |
} |
|
1697 |
} |
|
1698 |
||
1699 |
if (VM_Version::has_Crypto_AES192()) { |
|
1700 |
__ bind(parmBlk_192); |
|
1701 |
cv_len = VM_Version::Cipher::_AES192_dataBlk; |
|
1702 |
__ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. |
|
1703 |
if (VM_Version::has_Crypto_AES256()) { |
|
1704 |
__ z_bru(parmBlk_set); |
|
1705 |
} |
|
1706 |
} |
|
1707 |
||
1708 |
if (VM_Version::has_Crypto_AES256()) { |
|
1709 |
__ bind(parmBlk_256); |
|
1710 |
cv_len = VM_Version::Cipher::_AES256_dataBlk; |
|
1711 |
__ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. |
|
1712 |
// __ z_bru(parmBlk_set); // fallthru |
|
1713 |
} |
|
1714 |
__ bind(parmBlk_set); |
|
1715 |
} |
|
46733 | 1716 |
__ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute. Z_SP saved by push_parmBlk. |
42065 | 1717 |
BLOCK_COMMENT("} pop parmBlk"); |
1718 |
} |
|
1719 |
||
46733 | 1720 |
// Compute AES encrypt/decrypt function. |
1721 |
void generate_AES_cipherBlock(bool is_decipher) { |
|
1722 |
// Incoming arguments. |
|
1723 |
Register from = Z_ARG1; // source byte array |
|
1724 |
Register to = Z_ARG2; // destination byte array |
|
1725 |
Register key = Z_ARG3; // expanded key array |
|
1726 |
||
1727 |
const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. |
|
1728 |
||
1729 |
// Register definitions as required by KM instruction. |
|
1730 |
const Register fCode = Z_R0; // crypto function code |
|
1731 |
const Register parmBlk = Z_R1; // parameter block address (points to crypto key) |
|
1732 |
const Register src = Z_ARG1; // Must be even reg (KM requirement). |
|
1733 |
const Register srclen = Z_ARG2; // Must be odd reg and pair with src. Overwrites destination address. |
|
1734 |
const Register dst = Z_ARG3; // Must be even reg (KM requirement). Overwrites expanded key address. |
|
1735 |
||
1736 |
// Read key len of expanded key (in 4-byte words). |
|
1737 |
__ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
1738 |
||
1739 |
// Copy arguments to registers as required by crypto instruction. |
|
1740 |
__ z_lgr(parmBlk, key); // crypto key (in T_INT array). |
|
1741 |
__ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. |
|
1742 |
__ z_lgr(dst, to); // Copy dst address, even register required. |
|
1743 |
||
1744 |
// Construct function code into fCode(Z_R0), data block length into srclen(Z_ARG2). |
|
1745 |
generate_load_AES_fCode(keylen, fCode, srclen, is_decipher); |
|
1746 |
||
1747 |
__ km(dst, src); // Cipher the message. |
|
1748 |
||
1749 |
__ z_br(Z_R14); |
|
1750 |
} |
|
1751 |
||
42065 | 1752 |
// Compute AES encrypt function. |
1753 |
address generate_AES_encryptBlock(const char* name) { |
|
1754 |
__ align(CodeEntryAlignment); |
|
1755 |
StubCodeMark mark(this, "StubRoutines", name); |
|
46733 | 1756 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
1757 |
||
1758 |
generate_AES_cipherBlock(false); |
|
42065 | 1759 |
|
1760 |
return __ addr_at(start_off); |
|
1761 |
} |
|
1762 |
||
1763 |
// Compute AES decrypt function. |
|
1764 |
address generate_AES_decryptBlock(const char* name) { |
|
1765 |
__ align(CodeEntryAlignment); |
|
1766 |
StubCodeMark mark(this, "StubRoutines", name); |
|
46733 | 1767 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
1768 |
||
1769 |
generate_AES_cipherBlock(true); |
|
42065 | 1770 |
|
1771 |
return __ addr_at(start_off); |
|
1772 |
} |
|
1773 |
||
1774 |
// These stubs receive the addresses of the cryptographic key and of the chaining value as two separate |
|
1775 |
// arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires |
|
1776 |
// chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some |
|
1777 |
// thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing. |
|
1778 |
// Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller. |
|
1779 |
// *** WARNING *** |
|
1780 |
// Please note that we do not formally allocate stack space, nor do we |
|
1781 |
// update the stack pointer. Therefore, no function calls are allowed |
|
1782 |
// and nobody else must use the stack range where the parameter block |
|
1783 |
// is located. |
|
1784 |
// We align the parameter block to the next available octoword. |
|
1785 |
// |
|
1786 |
// Compute chained AES encrypt function. |
|
46733 | 1787 |
void generate_AES_cipherBlockChaining(bool is_decipher) { |
42065 | 1788 |
|
1789 |
Register from = Z_ARG1; // source byte array (clear text) |
|
1790 |
Register to = Z_ARG2; // destination byte array (ciphered) |
|
1791 |
Register key = Z_ARG3; // expanded key array. |
|
1792 |
Register cv = Z_ARG4; // chaining value |
|
1793 |
const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned |
|
1794 |
// in Z_RET upon completion of this stub. Is 32-bit integer. |
|
1795 |
||
1796 |
const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only. |
|
1797 |
const Register fCode = Z_R0; // crypto function code |
|
1798 |
const Register parmBlk = Z_R1; // parameter block address (points to crypto key) |
|
1799 |
const Register src = Z_ARG1; // is Z_R2 |
|
1800 |
const Register srclen = Z_ARG2; // Overwrites destination address. |
|
1801 |
const Register dst = Z_ARG3; // Overwrites key address. |
|
1802 |
||
1803 |
// Read key len of expanded key (in 4-byte words). |
|
1804 |
__ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
1805 |
||
1806 |
// Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. |
|
46733 | 1807 |
// Construct function code in fCode (Z_R0). |
1808 |
generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, is_decipher); |
|
42065 | 1809 |
|
1810 |
// Prepare other registers for instruction. |
|
46733 | 1811 |
__ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. |
42065 | 1812 |
__ z_lgr(dst, to); |
46733 | 1813 |
__ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. |
1814 |
||
1815 |
__ kmc(dst, src); // Cipher the message. |
|
42065 | 1816 |
|
1817 |
generate_pop_parmBlk(keylen, parmBlk, key, cv); |
|
1818 |
||
46733 | 1819 |
__ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. |
42065 | 1820 |
__ z_br(Z_R14); |
46733 | 1821 |
} |
1822 |
||
1823 |
// Compute chained AES encrypt function. |
|
1824 |
address generate_cipherBlockChaining_AES_encrypt(const char* name) { |
|
1825 |
__ align(CodeEntryAlignment); |
|
1826 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1827 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1828 |
||
1829 |
generate_AES_cipherBlockChaining(false); |
|
42065 | 1830 |
|
1831 |
return __ addr_at(start_off); |
|
1832 |
} |
|
1833 |
||
1834 |
// Compute chained AES encrypt function. |
|
1835 |
address generate_cipherBlockChaining_AES_decrypt(const char* name) { |
|
1836 |
__ align(CodeEntryAlignment); |
|
1837 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1838 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1839 |
||
46733 | 1840 |
generate_AES_cipherBlockChaining(true); |
42065 | 1841 |
|
1842 |
return __ addr_at(start_off); |
|
1843 |
} |
|
1844 |
||
1845 |
||
53789 | 1846 |
// Compute GHASH function. |
1847 |
address generate_ghash_processBlocks() { |
|
1848 |
__ align(CodeEntryAlignment); |
|
1849 |
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); |
|
1850 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1851 |
||
1852 |
const Register state = Z_ARG1; |
|
1853 |
const Register subkeyH = Z_ARG2; |
|
1854 |
const Register data = Z_ARG3; // 1st of even-odd register pair. |
|
1855 |
const Register blocks = Z_ARG4; |
|
1856 |
const Register len = blocks; // 2nd of even-odd register pair. |
|
1857 |
||
1858 |
const int param_block_size = 4 * 8; |
|
1859 |
const int frame_resize = param_block_size + 8; // Extra space for copy of fp. |
|
1860 |
||
1861 |
// Reserve stack space for parameter block (R1). |
|
1862 |
__ z_lgr(Z_R1, Z_SP); |
|
1863 |
__ resize_frame(-frame_resize, Z_R0, true); |
|
1864 |
__ z_aghi(Z_R1, -param_block_size); |
|
1865 |
||
1866 |
// Fill parameter block. |
|
1867 |
__ z_mvc(Address(Z_R1) , Address(state) , 16); |
|
1868 |
__ z_mvc(Address(Z_R1, 16), Address(subkeyH), 16); |
|
1869 |
||
1870 |
// R4+5: data pointer + length |
|
1871 |
__ z_llgfr(len, blocks); // Cast to 64-bit. |
|
1872 |
||
1873 |
// R0: function code |
|
1874 |
__ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_GHASH); |
|
1875 |
||
1876 |
// Compute. |
|
1877 |
__ z_sllg(len, len, 4); // In bytes. |
|
1878 |
__ kimd(data); |
|
1879 |
||
1880 |
// Copy back result and free parameter block. |
|
1881 |
__ z_mvc(Address(state), Address(Z_R1), 16); |
|
1882 |
__ z_xc(Address(Z_R1), param_block_size, Address(Z_R1)); |
|
1883 |
__ z_aghi(Z_SP, frame_resize); |
|
1884 |
||
1885 |
__ z_br(Z_R14); |
|
1886 |
||
1887 |
return __ addr_at(start_off); |
|
1888 |
} |
|
1889 |
||
1890 |
||
42065 | 1891 |
// Call interface for all SHA* stubs. |
1892 |
// |
|
1893 |
// Z_ARG1 - source data block. Ptr to leftmost byte to be processed. |
|
1894 |
// Z_ARG2 - current SHA state. Ptr to state area. This area serves as |
|
1895 |
// parameter block as required by the crypto instruction. |
|
1896 |
// Z_ARG3 - current byte offset in source data block. |
|
1897 |
// Z_ARG4 - last byte offset in source data block. |
|
1898 |
// (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed. |
|
1899 |
// |
|
1900 |
// Z_RET - return value. First unprocessed byte offset in src buffer. |
|
1901 |
// |
|
1902 |
// A few notes on the call interface: |
|
1903 |
// - All stubs, whether they are single-block or multi-block, are assumed to |
|
1904 |
// digest an integer multiple of the data block length of data. All data |
|
1905 |
// blocks are digested using the intermediate message digest (KIMD) instruction. |
|
1906 |
// Special end processing, as done by the KLMD instruction, seems to be |
|
1907 |
// emulated by the calling code. |
|
1908 |
// |
|
1909 |
// - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is |
|
1910 |
// already accounted for. |
|
1911 |
// |
|
1912 |
// - The current SHA state (the intermediate message digest value) is contained |
|
1913 |
// in an area addressed by Z_ARG2. The area size depends on the SHA variant |
|
1914 |
// and is accessible via the enum VM_Version::MsgDigest::_SHA<n>_parmBlk_I |
|
1915 |
// |
|
1916 |
// - The single-block stub is expected to digest exactly one data block, starting |
|
1917 |
// at the address passed in Z_ARG1. |
|
1918 |
// |
|
1919 |
// - The multi-block stub is expected to digest all data blocks which start in |
|
1920 |
// the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference |
|
1921 |
// (srcLimit-srcOff), rounded up to the next multiple of the data block length, |
|
1922 |
// gives the number of blocks to digest. It must be assumed that the calling code |
|
1923 |
// provides for a large enough source data buffer. |
|
1924 |
// |
|
1925 |
// Compute SHA-1 function. |
|
1926 |
address generate_SHA1_stub(bool multiBlock, const char* name) { |
|
1927 |
__ align(CodeEntryAlignment); |
|
1928 |
StubCodeMark mark(this, "StubRoutines", name); |
|
1929 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
1930 |
||
1931 |
const Register srcBuff = Z_ARG1; // Points to first block to process (offset already added). |
|
1932 |
const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs. |
|
1933 |
const Register srcOff = Z_ARG3; // int |
|
1934 |
const Register srcLimit = Z_ARG4; // Only passed in multiBlock case. int |
|
1935 |
||
1936 |
const Register SHAState_local = Z_R1; |
|
1937 |
const Register SHAState_save = Z_ARG3; |
|
1938 |
const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. |
|
1939 |
Label useKLMD, rtn; |
|
1940 |
||
1941 |
__ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1); // function code |
|
1942 |
__ z_lgr(SHAState_local, SHAState); // SHAState == parameter block |
|
1943 |
||
1944 |
if (multiBlock) { // Process everything from offset to limit. |
|
1945 |
||
1946 |
// The following description is valid if we get a raw (unpimped) source data buffer, |
|
1947 |
// spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, |
|
1948 |
// the calling convention for these stubs is different. We leave the description in |
|
1949 |
// to inform the reader what must be happening hidden in the calling code. |
|
1950 |
// |
|
1951 |
// The data block to be processed can have arbitrary length, i.e. its length does not |
|
1952 |
// need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement |
|
1953 |
// two different paths. If the length is an integer multiple, we use KIMD, saving us |
|
1954 |
// to copy the SHA state back and forth. If the length is odd, we copy the SHA state |
|
1955 |
// to the stack, execute a KLMD instruction on it and copy the result back to the |
|
1956 |
// caller's SHA state location. |
|
1957 |
||
1958 |
// Total #srcBuff blocks to process. |
|
1959 |
if (VM_Version::has_DistinctOpnds()) { |
|
1960 |
__ z_srk(srcBufLen, srcLimit, srcOff); // exact difference |
|
1961 |
__ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up |
|
1962 |
__ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); |
|
1963 |
__ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. |
|
1964 |
__ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. |
|
1965 |
} else { |
|
1966 |
__ z_lgfr(srcBufLen, srcLimit); // Exact difference. srcLimit passed as int. |
|
1967 |
__ z_sgfr(srcBufLen, srcOff); // SrcOff passed as int, now properly casted to long. |
|
1968 |
__ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up |
|
1969 |
__ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); |
|
1970 |
__ z_lgr(srcLimit, srcOff); // SrcLimit temporarily holds return value. |
|
1971 |
__ z_agr(srcLimit, srcBufLen); |
|
1972 |
} |
|
1973 |
||
1974 |
// Integral #blocks to digest? |
|
1975 |
// As a result of the calculations above, srcBufLen MUST be an integer |
|
1976 |
// multiple of _SHA1_dataBlk, or else we are in big trouble. |
|
1977 |
// We insert an asm_assert into the KLMD case to guard against that. |
|
1978 |
__ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); |
|
1979 |
__ z_brc(Assembler::bcondNotAllZero, useKLMD); |
|
1980 |
||
1981 |
// Process all full blocks. |
|
1982 |
__ kimd(srcBuff); |
|
1983 |
||
1984 |
__ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. |
|
1985 |
} else { // Process one data block only. |
|
1986 |
__ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk); // #srcBuff bytes to process |
|
1987 |
__ kimd(srcBuff); |
|
1988 |
__ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed. |
|
1989 |
} |
|
1990 |
||
1991 |
__ bind(rtn); |
|
1992 |
__ z_br(Z_R14); |
|
1993 |
||
1994 |
if (multiBlock) { |
|
1995 |
__ bind(useKLMD); |
|
1996 |
||
1997 |
#if 1 |
|
1998 |
// Security net: this stub is believed to be called for full-sized data blocks only |
|
1999 |
// NOTE: The following code is believed to be correct, but is is not tested. |
|
2000 |
__ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); |
|
2001 |
#endif |
|
2002 |
} |
|
2003 |
||
2004 |
return __ addr_at(start_off); |
|
2005 |
} |
|
2006 |
||
2007 |
// Compute SHA-256 function. |
|
2008 |
address generate_SHA256_stub(bool multiBlock, const char* name) { |
|
2009 |
__ align(CodeEntryAlignment); |
|
2010 |
StubCodeMark mark(this, "StubRoutines", name); |
|
2011 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
2012 |
||
2013 |
const Register srcBuff = Z_ARG1; |
|
2014 |
const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. |
|
2015 |
const Register SHAState_local = Z_R1; |
|
2016 |
const Register SHAState_save = Z_ARG3; |
|
2017 |
const Register srcOff = Z_ARG3; |
|
2018 |
const Register srcLimit = Z_ARG4; |
|
2019 |
const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. |
|
2020 |
Label useKLMD, rtn; |
|
2021 |
||
2022 |
__ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code |
|
2023 |
__ z_lgr(SHAState_local, SHAState); // SHAState == parameter block |
|
2024 |
||
2025 |
if (multiBlock) { // Process everything from offset to limit. |
|
2026 |
// The following description is valid if we get a raw (unpimped) source data buffer, |
|
2027 |
// spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, |
|
2028 |
// the calling convention for these stubs is different. We leave the description in |
|
2029 |
// to inform the reader what must be happening hidden in the calling code. |
|
2030 |
// |
|
2031 |
// The data block to be processed can have arbitrary length, i.e. its length does not |
|
2032 |
// need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement |
|
2033 |
// two different paths. If the length is an integer multiple, we use KIMD, saving us |
|
2034 |
// to copy the SHA state back and forth. If the length is odd, we copy the SHA state |
|
2035 |
// to the stack, execute a KLMD instruction on it and copy the result back to the |
|
2036 |
// caller's SHA state location. |
|
2037 |
||
2038 |
// total #srcBuff blocks to process |
|
2039 |
if (VM_Version::has_DistinctOpnds()) { |
|
2040 |
__ z_srk(srcBufLen, srcLimit, srcOff); // exact difference |
|
2041 |
__ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up |
|
2042 |
__ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); |
|
2043 |
__ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. |
|
2044 |
__ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. |
|
2045 |
} else { |
|
2046 |
__ z_lgfr(srcBufLen, srcLimit); // exact difference |
|
2047 |
__ z_sgfr(srcBufLen, srcOff); |
|
2048 |
__ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up |
|
2049 |
__ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); |
|
2050 |
__ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. |
|
2051 |
__ z_agr(srcLimit, srcBufLen); |
|
2052 |
} |
|
2053 |
||
2054 |
// Integral #blocks to digest? |
|
2055 |
// As a result of the calculations above, srcBufLen MUST be an integer |
|
2056 |
// multiple of _SHA1_dataBlk, or else we are in big trouble. |
|
2057 |
// We insert an asm_assert into the KLMD case to guard against that. |
|
2058 |
__ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); |
|
2059 |
__ z_brc(Assembler::bcondNotAllZero, useKLMD); |
|
2060 |
||
2061 |
// Process all full blocks. |
|
2062 |
__ kimd(srcBuff); |
|
2063 |
||
2064 |
__ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. |
|
2065 |
} else { // Process one data block only. |
|
2066 |
__ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process |
|
2067 |
__ kimd(srcBuff); |
|
2068 |
__ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. |
|
2069 |
} |
|
2070 |
||
2071 |
__ bind(rtn); |
|
2072 |
__ z_br(Z_R14); |
|
2073 |
||
2074 |
if (multiBlock) { |
|
2075 |
__ bind(useKLMD); |
|
2076 |
#if 1 |
|
2077 |
// Security net: this stub is believed to be called for full-sized data blocks only. |
|
2078 |
// NOTE: |
|
2079 |
// The following code is believed to be correct, but is is not tested. |
|
2080 |
__ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); |
|
2081 |
#endif |
|
2082 |
} |
|
2083 |
||
2084 |
return __ addr_at(start_off); |
|
2085 |
} |
|
2086 |
||
2087 |
// Compute SHA-512 function. |
|
2088 |
address generate_SHA512_stub(bool multiBlock, const char* name) { |
|
2089 |
__ align(CodeEntryAlignment); |
|
2090 |
StubCodeMark mark(this, "StubRoutines", name); |
|
2091 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
|
2092 |
||
2093 |
const Register srcBuff = Z_ARG1; |
|
2094 |
const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. |
|
2095 |
const Register SHAState_local = Z_R1; |
|
2096 |
const Register SHAState_save = Z_ARG3; |
|
2097 |
const Register srcOff = Z_ARG3; |
|
2098 |
const Register srcLimit = Z_ARG4; |
|
2099 |
const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. |
|
2100 |
Label useKLMD, rtn; |
|
2101 |
||
2102 |
__ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code |
|
2103 |
__ z_lgr(SHAState_local, SHAState); // SHAState == parameter block |
|
2104 |
||
2105 |
if (multiBlock) { // Process everything from offset to limit. |
|
2106 |
// The following description is valid if we get a raw (unpimped) source data buffer, |
|
2107 |
// spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, |
|
2108 |
// the calling convention for these stubs is different. We leave the description in |
|
2109 |
// to inform the reader what must be happening hidden in the calling code. |
|
2110 |
// |
|
2111 |
// The data block to be processed can have arbitrary length, i.e. its length does not |
|
2112 |
// need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement |
|
2113 |
// two different paths. If the length is an integer multiple, we use KIMD, saving us |
|
2114 |
// to copy the SHA state back and forth. If the length is odd, we copy the SHA state |
|
2115 |
// to the stack, execute a KLMD instruction on it and copy the result back to the |
|
2116 |
// caller's SHA state location. |
|
2117 |
||
2118 |
// total #srcBuff blocks to process |
|
2119 |
if (VM_Version::has_DistinctOpnds()) { |
|
2120 |
__ z_srk(srcBufLen, srcLimit, srcOff); // exact difference |
|
2121 |
__ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up |
|
2122 |
__ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); |
|
2123 |
__ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. |
|
2124 |
__ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. |
|
2125 |
} else { |
|
2126 |
__ z_lgfr(srcBufLen, srcLimit); // exact difference |
|
2127 |
__ z_sgfr(srcBufLen, srcOff); |
|
2128 |
__ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up |
|
2129 |
__ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); |
|
2130 |
__ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. |
|
2131 |
__ z_agr(srcLimit, srcBufLen); |
|
2132 |
} |
|
2133 |
||
2134 |
// integral #blocks to digest? |
|
2135 |
// As a result of the calculations above, srcBufLen MUST be an integer |
|
2136 |
// multiple of _SHA1_dataBlk, or else we are in big trouble. |
|
2137 |
// We insert an asm_assert into the KLMD case to guard against that. |
|
2138 |
__ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); |
|
2139 |
__ z_brc(Assembler::bcondNotAllZero, useKLMD); |
|
2140 |
||
2141 |
// Process all full blocks. |
|
2142 |
__ kimd(srcBuff); |
|
2143 |
||
2144 |
__ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. |
|
2145 |
} else { // Process one data block only. |
|
2146 |
__ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process |
|
2147 |
__ kimd(srcBuff); |
|
2148 |
__ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. |
|
2149 |
} |
|
2150 |
||
2151 |
__ bind(rtn); |
|
2152 |
__ z_br(Z_R14); |
|
2153 |
||
2154 |
if (multiBlock) { |
|
2155 |
__ bind(useKLMD); |
|
2156 |
#if 1 |
|
2157 |
// Security net: this stub is believed to be called for full-sized data blocks only |
|
2158 |
// NOTE: |
|
2159 |
// The following code is believed to be correct, but is is not tested. |
|
2160 |
__ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); |
|
2161 |
#endif |
|
2162 |
} |
|
2163 |
||
2164 |
return __ addr_at(start_off); |
|
2165 |
} |
|
2166 |
||
2167 |
||
46315
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2168 |
/** |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2169 |
* Arguments: |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2170 |
* |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2171 |
* Inputs: |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2172 |
* Z_ARG1 - int crc |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2173 |
* Z_ARG2 - byte* buf |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2174 |
* Z_ARG3 - int length (of buffer) |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2175 |
* |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2176 |
* Result: |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2177 |
* Z_RET - int crc result |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2178 |
**/ |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2179 |
// Compute CRC function (generic, for all polynomials). |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2180 |
void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) { |
42065 | 2181 |
|
2182 |
// arguments to kernel_crc32: |
|
2183 |
Register crc = Z_ARG1; // Current checksum, preset by caller or result from previous call, int. |
|
2184 |
Register data = Z_ARG2; // source byte array |
|
2185 |
Register dataLen = Z_ARG3; // #bytes to process, int |
|
46315
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2186 |
// Register table = Z_ARG4; // crc table address. Preloaded and passed in by caller. |
42065 | 2187 |
const Register t0 = Z_R10; // work reg for kernel* emitters |
2188 |
const Register t1 = Z_R11; // work reg for kernel* emitters |
|
2189 |
const Register t2 = Z_R12; // work reg for kernel* emitters |
|
2190 |
const Register t3 = Z_R13; // work reg for kernel* emitters |
|
2191 |
||
2192 |
assert_different_registers(crc, data, dataLen, table); |
|
2193 |
||
2194 |
// We pass these values as ints, not as longs as required by C calling convention. |
|
2195 |
// Crc used as int. |
|
2196 |
__ z_llgfr(dataLen, dataLen); |
|
2197 |
||
2198 |
__ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. |
|
2199 |
__ z_stmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 to make them available as work registers. |
|
46315
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2200 |
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, invertCRC); |
42065 | 2201 |
__ z_lmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 back from stack. |
2202 |
__ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. |
|
2203 |
||
2204 |
__ z_llgfr(Z_RET, crc); // Updated crc is function result. No copying required, just zero upper 32 bits. |
|
2205 |
__ z_br(Z_R14); // Result already in Z_RET == Z_ARG1. |
|
46315
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2206 |
} |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2207 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2208 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2209 |
// Compute CRC32 function. |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2210 |
address generate_CRC32_updateBytes(const char* name) { |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2211 |
__ align(CodeEntryAlignment); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2212 |
StubCodeMark mark(this, "StubRoutines", name); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2213 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2214 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2215 |
assert(UseCRC32Intrinsics, "should not generate this stub (%s) with CRC32 intrinsics disabled", name); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2216 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2217 |
BLOCK_COMMENT("CRC32_updateBytes {"); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2218 |
Register table = Z_ARG4; // crc32 table address. |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2219 |
StubRoutines::zarch::generate_load_crc_table_addr(_masm, table); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2220 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2221 |
generate_CRC_updateBytes(name, table, true); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2222 |
BLOCK_COMMENT("} CRC32_updateBytes"); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2223 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2224 |
return __ addr_at(start_off); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2225 |
} |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2226 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2227 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2228 |
// Compute CRC32C function. |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2229 |
address generate_CRC32C_updateBytes(const char* name) { |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2230 |
__ align(CodeEntryAlignment); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2231 |
StubCodeMark mark(this, "StubRoutines", name); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2232 |
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2233 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2234 |
assert(UseCRC32CIntrinsics, "should not generate this stub (%s) with CRC32C intrinsics disabled", name); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2235 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2236 |
BLOCK_COMMENT("CRC32C_updateBytes {"); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2237 |
Register table = Z_ARG4; // crc32c table address. |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2238 |
StubRoutines::zarch::generate_load_crc32c_table_addr(_masm, table); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2239 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2240 |
generate_CRC_updateBytes(name, table, false); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2241 |
BLOCK_COMMENT("} CRC32C_updateBytes"); |
42065 | 2242 |
|
2243 |
return __ addr_at(start_off); |
|
2244 |
} |
|
2245 |
||
2246 |
||
2247 |
// Arguments: |
|
2248 |
// Z_ARG1 - x address |
|
2249 |
// Z_ARG2 - x length |
|
2250 |
// Z_ARG3 - y address |
|
2251 |
// Z_ARG4 - y length |
|
2252 |
// Z_ARG5 - z address |
|
2253 |
// 160[Z_SP] - z length |
|
2254 |
address generate_multiplyToLen() { |
|
2255 |
__ align(CodeEntryAlignment); |
|
2256 |
StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); |
|
2257 |
||
2258 |
address start = __ pc(); |
|
2259 |
||
2260 |
const Register x = Z_ARG1; |
|
2261 |
const Register xlen = Z_ARG2; |
|
2262 |
const Register y = Z_ARG3; |
|
2263 |
const Register ylen = Z_ARG4; |
|
2264 |
const Register z = Z_ARG5; |
|
2265 |
// zlen is passed on the stack: |
|
2266 |
// Address zlen(Z_SP, _z_abi(remaining_cargs)); |
|
2267 |
||
2268 |
// Next registers will be saved on stack in multiply_to_len(). |
|
2269 |
const Register tmp1 = Z_tmp_1; |
|
2270 |
const Register tmp2 = Z_tmp_2; |
|
2271 |
const Register tmp3 = Z_tmp_3; |
|
2272 |
const Register tmp4 = Z_tmp_4; |
|
2273 |
const Register tmp5 = Z_R9; |
|
2274 |
||
2275 |
BLOCK_COMMENT("Entry:"); |
|
2276 |
||
2277 |
__ z_llgfr(xlen, xlen); |
|
2278 |
__ z_llgfr(ylen, ylen); |
|
2279 |
||
2280 |
__ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5); |
|
2281 |
||
2282 |
__ z_br(Z_R14); // Return to caller. |
|
2283 |
||
2284 |
return start; |
|
2285 |
} |
|
2286 |
||
2287 |
void generate_initial() { |
|
2288 |
// Generates all stubs and initializes the entry points. |
|
2289 |
||
2290 |
// Entry points that exist in all platforms. |
|
2291 |
// Note: This is code that could be shared among different |
|
2292 |
// platforms - however the benefit seems to be smaller than the |
|
2293 |
// disadvantage of having a much more complicated generator |
|
2294 |
// structure. See also comment in stubRoutines.hpp. |
|
2295 |
StubRoutines::_forward_exception_entry = generate_forward_exception(); |
|
2296 |
||
2297 |
StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); |
|
2298 |
StubRoutines::_catch_exception_entry = generate_catch_exception(); |
|
2299 |
||
2300 |
// Build this early so it's available for the interpreter. |
|
2301 |
StubRoutines::_throw_StackOverflowError_entry = |
|
2302 |
generate_throw_exception("StackOverflowError throw_exception", |
|
2303 |
CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); |
|
43420
a056d6465ef9
8172049: [s390] Implement "JEP 270: Reserved Stack Areas for Critical Sections".
goetz
parents:
42897
diff
changeset
|
2304 |
StubRoutines::_throw_delayed_StackOverflowError_entry = |
a056d6465ef9
8172049: [s390] Implement "JEP 270: Reserved Stack Areas for Critical Sections".
goetz
parents:
42897
diff
changeset
|
2305 |
generate_throw_exception("delayed StackOverflowError throw_exception", |
a056d6465ef9
8172049: [s390] Implement "JEP 270: Reserved Stack Areas for Critical Sections".
goetz
parents:
42897
diff
changeset
|
2306 |
CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), false); |
42065 | 2307 |
|
2308 |
//---------------------------------------------------------------------- |
|
2309 |
// Entry points that are platform specific. |
|
2310 |
||
2311 |
if (UseCRC32Intrinsics) { |
|
46315
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2312 |
StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table; |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2313 |
StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes"); |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2314 |
} |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2315 |
|
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2316 |
if (UseCRC32CIntrinsics) { |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2317 |
StubRoutines::_crc32c_table_addr = (address)StubRoutines::zarch::_crc32c_table; |
a796c32af782
8175368: [s390] Provide intrinsic implementation for CRC32C
lucy
parents:
46289
diff
changeset
|
2318 |
StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes"); |
42065 | 2319 |
} |
2320 |
||
2321 |
// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. |
|
2322 |
StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table; |
|
2323 |
} |
|
2324 |
||
2325 |
||
2326 |
void generate_all() { |
|
2327 |
// Generates all stubs and initializes the entry points. |
|
2328 |
||
2329 |
StubRoutines::zarch::_partial_subtype_check = generate_partial_subtype_check(); |
|
2330 |
||
2331 |
// These entry points require SharedInfo::stack0 to be set up in non-core builds. |
|
2332 |
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); |
|
2333 |
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); |
|
2334 |
StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); |
|
2335 |
||
2336 |
// Support for verify_oop (must happen after universe_init). |
|
2337 |
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); |
|
2338 |
||
2339 |
// Arraycopy stubs used by compilers. |
|
2340 |
generate_arraycopy_stubs(); |
|
2341 |
||
2342 |
// safefetch stubs |
|
2343 |
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc); |
|
2344 |
generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, &StubRoutines::_safefetchN_fault_pc, &StubRoutines::_safefetchN_continuation_pc); |
|
2345 |
||
2346 |
// Generate AES intrinsics code. |
|
2347 |
if (UseAESIntrinsics) { |
|
2348 |
StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock("AES_encryptBlock"); |
|
2349 |
StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock("AES_decryptBlock"); |
|
2350 |
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt("AES_encryptBlock_chaining"); |
|
2351 |
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt("AES_decryptBlock_chaining"); |
|
2352 |
} |
|
2353 |
||
53789 | 2354 |
// Generate GHASH intrinsics code |
2355 |
if (UseGHASHIntrinsics) { |
|
2356 |
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); |
|
2357 |
} |
|
2358 |
||
42065 | 2359 |
// Generate SHA1/SHA256/SHA512 intrinsics code. |
2360 |
if (UseSHA1Intrinsics) { |
|
2361 |
StubRoutines::_sha1_implCompress = generate_SHA1_stub(false, "SHA1_singleBlock"); |
|
2362 |
StubRoutines::_sha1_implCompressMB = generate_SHA1_stub(true, "SHA1_multiBlock"); |
|
2363 |
} |
|
2364 |
if (UseSHA256Intrinsics) { |
|
2365 |
StubRoutines::_sha256_implCompress = generate_SHA256_stub(false, "SHA256_singleBlock"); |
|
2366 |
StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(true, "SHA256_multiBlock"); |
|
2367 |
} |
|
2368 |
if (UseSHA512Intrinsics) { |
|
2369 |
StubRoutines::_sha512_implCompress = generate_SHA512_stub(false, "SHA512_singleBlock"); |
|
2370 |
StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(true, "SHA512_multiBlock"); |
|
2371 |
} |
|
2372 |
||
2373 |
#ifdef COMPILER2 |
|
2374 |
if (UseMultiplyToLenIntrinsic) { |
|
2375 |
StubRoutines::_multiplyToLen = generate_multiplyToLen(); |
|
2376 |
} |
|
2377 |
if (UseMontgomeryMultiplyIntrinsic) { |
|
2378 |
StubRoutines::_montgomeryMultiply |
|
2379 |
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); |
|
2380 |
} |
|
2381 |
if (UseMontgomerySquareIntrinsic) { |
|
2382 |
StubRoutines::_montgomerySquare |
|
2383 |
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); |
|
2384 |
} |
|
2385 |
#endif |
|
2386 |
} |
|
2387 |
||
2388 |
public: |
|
2389 |
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |
|
2390 |
// Replace the standard masm with a special one: |
|
2391 |
_masm = new MacroAssembler(code); |
|
2392 |
||
2393 |
_stub_count = !all ? 0x100 : 0x200; |
|
2394 |
if (all) { |
|
2395 |
generate_all(); |
|
2396 |
} else { |
|
2397 |
generate_initial(); |
|
2398 |
} |
|
2399 |
} |
|
2400 |
||
2401 |
private: |
|
2402 |
int _stub_count; |
|
2403 |
void stub_prolog(StubCodeDesc* cdesc) { |
|
2404 |
#ifdef ASSERT |
|
2405 |
// Put extra information in the stub code, to make it more readable. |
|
2406 |
// Write the high part of the address. |
|
2407 |
// [RGV] Check if there is a dependency on the size of this prolog. |
|
2408 |
__ emit_32((intptr_t)cdesc >> 32); |
|
2409 |
__ emit_32((intptr_t)cdesc); |
|
2410 |
__ emit_32(++_stub_count); |
|
2411 |
#endif |
|
2412 |
align(true); |
|
2413 |
} |
|
2414 |
||
2415 |
void align(bool at_header = false) { |
|
2416 |
// z/Architecture cache line size is 256 bytes. |
|
2417 |
// There is no obvious benefit in aligning stub |
|
2418 |
// code to cache lines. Use CodeEntryAlignment instead. |
|
2419 |
const unsigned int icache_line_size = CodeEntryAlignment; |
|
2420 |
const unsigned int icache_half_line_size = MIN2<unsigned int>(32, CodeEntryAlignment); |
|
2421 |
||
2422 |
if (at_header) { |
|
2423 |
while ((intptr_t)(__ pc()) % icache_line_size != 0) { |
|
2424 |
__ emit_16(0); |
|
2425 |
} |
|
2426 |
} else { |
|
2427 |
while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { |
|
2428 |
__ z_nop(); |
|
2429 |
} |
|
2430 |
} |
|
2431 |
} |
|
2432 |
||
2433 |
}; |
|
2434 |
||
2435 |
void StubGenerator_generate(CodeBuffer* code, bool all) { |
|
2436 |
StubGenerator g(code, all); |
|
2437 |
} |