author | shade |
Thu, 04 Feb 2016 21:44:23 +0300 | |
changeset 35708 | 290a3952e434 |
parent 35127 | 483603d4c7b2 |
child 36562 | 4d1e93624d6a |
permissions | -rw-r--r-- |
29183 | 1 |
/* |
2 |
* Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
|
30225
e9722ea461d4
8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents:
29183
diff
changeset
|
3 |
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. |
29183 | 4 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
5 |
* |
|
6 |
* This code is free software; you can redistribute it and/or modify it |
|
7 |
* under the terms of the GNU General Public License version 2 only, as |
|
8 |
* published by the Free Software Foundation. |
|
9 |
* |
|
10 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
14 |
* accompanied this code). |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU General Public License version |
|
17 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
18 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 |
* |
|
20 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
21 |
* or visit www.oracle.com if you need additional information or have any |
|
22 |
* questions. |
|
23 |
* |
|
24 |
*/ |
|
25 |
||
26 |
#ifndef CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP |
|
27 |
#define CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP |
|
28 |
||
29 |
#include "asm/register.hpp" |
|
30 |
||
31 |
// definitions of various symbolic names for machine registers |
|
32 |
||
33 |
// First intercalls between C and Java which use 8 general registers |
|
34 |
// and 8 floating registers |
|
35 |
||
36 |
// we also have to copy between x86 and ARM registers but that's a |
|
37 |
// secondary complication -- not all code employing C call convention |
|
38 |
// executes as x86 code though -- we generate some of it |
|
39 |
||
40 |
class Argument VALUE_OBJ_CLASS_SPEC { |
|
41 |
public: |
|
42 |
enum { |
|
43 |
n_int_register_parameters_c = 8, // r0, r1, ... r7 (c_rarg0, c_rarg1, ...) |
|
44 |
n_float_register_parameters_c = 8, // v0, v1, ... v7 (c_farg0, c_farg1, ... ) |
|
45 |
||
46 |
n_int_register_parameters_j = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ... |
|
47 |
n_float_register_parameters_j = 8 // v0, v1, ... v7 (j_farg0, j_farg1, ... |
|
48 |
}; |
|
49 |
}; |
|
50 |
||
51 |
REGISTER_DECLARATION(Register, c_rarg0, r0); |
|
52 |
REGISTER_DECLARATION(Register, c_rarg1, r1); |
|
53 |
REGISTER_DECLARATION(Register, c_rarg2, r2); |
|
54 |
REGISTER_DECLARATION(Register, c_rarg3, r3); |
|
55 |
REGISTER_DECLARATION(Register, c_rarg4, r4); |
|
56 |
REGISTER_DECLARATION(Register, c_rarg5, r5); |
|
57 |
REGISTER_DECLARATION(Register, c_rarg6, r6); |
|
58 |
REGISTER_DECLARATION(Register, c_rarg7, r7); |
|
59 |
||
60 |
REGISTER_DECLARATION(FloatRegister, c_farg0, v0); |
|
61 |
REGISTER_DECLARATION(FloatRegister, c_farg1, v1); |
|
62 |
REGISTER_DECLARATION(FloatRegister, c_farg2, v2); |
|
63 |
REGISTER_DECLARATION(FloatRegister, c_farg3, v3); |
|
64 |
REGISTER_DECLARATION(FloatRegister, c_farg4, v4); |
|
65 |
REGISTER_DECLARATION(FloatRegister, c_farg5, v5); |
|
66 |
REGISTER_DECLARATION(FloatRegister, c_farg6, v6); |
|
67 |
REGISTER_DECLARATION(FloatRegister, c_farg7, v7); |
|
68 |
||
69 |
// Symbolically name the register arguments used by the Java calling convention. |
|
70 |
// We have control over the convention for java so we can do what we please. |
|
71 |
// What pleases us is to offset the java calling convention so that when |
|
72 |
// we call a suitable jni method the arguments are lined up and we don't |
|
73 |
// have to do much shuffling. A suitable jni method is non-static and a |
|
74 |
// small number of arguments |
|
75 |
// |
|
76 |
// |--------------------------------------------------------------------| |
|
77 |
// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | |
|
78 |
// |--------------------------------------------------------------------| |
|
79 |
// | r0 r1 r2 r3 r4 r5 r6 r7 | |
|
80 |
// |--------------------------------------------------------------------| |
|
81 |
// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | |
|
82 |
// |--------------------------------------------------------------------| |
|
83 |
||
84 |
||
85 |
REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); |
|
86 |
REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); |
|
87 |
REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); |
|
88 |
REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); |
|
89 |
REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); |
|
90 |
REGISTER_DECLARATION(Register, j_rarg5, c_rarg6); |
|
91 |
REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); |
|
92 |
REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); |
|
93 |
||
94 |
// Java floating args are passed as per C |
|
95 |
||
96 |
REGISTER_DECLARATION(FloatRegister, j_farg0, v0); |
|
97 |
REGISTER_DECLARATION(FloatRegister, j_farg1, v1); |
|
98 |
REGISTER_DECLARATION(FloatRegister, j_farg2, v2); |
|
99 |
REGISTER_DECLARATION(FloatRegister, j_farg3, v3); |
|
100 |
REGISTER_DECLARATION(FloatRegister, j_farg4, v4); |
|
101 |
REGISTER_DECLARATION(FloatRegister, j_farg5, v5); |
|
102 |
REGISTER_DECLARATION(FloatRegister, j_farg6, v6); |
|
103 |
REGISTER_DECLARATION(FloatRegister, j_farg7, v7); |
|
104 |
||
105 |
// registers used to hold VM data either temporarily within a method |
|
106 |
// or across method calls |
|
107 |
||
108 |
// volatile (caller-save) registers |
|
109 |
||
110 |
// r8 is used for indirect result location return |
|
111 |
// we use it and r9 as scratch registers |
|
112 |
REGISTER_DECLARATION(Register, rscratch1, r8); |
|
113 |
REGISTER_DECLARATION(Register, rscratch2, r9); |
|
114 |
||
115 |
// current method -- must be in a call-clobbered register |
|
116 |
REGISTER_DECLARATION(Register, rmethod, r12); |
|
117 |
||
118 |
// non-volatile (callee-save) registers are r16-29 |
|
119 |
// of which the following are dedicated global state |
|
120 |
||
121 |
// link register |
|
122 |
REGISTER_DECLARATION(Register, lr, r30); |
|
123 |
// frame pointer |
|
124 |
REGISTER_DECLARATION(Register, rfp, r29); |
|
125 |
// current thread |
|
126 |
REGISTER_DECLARATION(Register, rthread, r28); |
|
127 |
// base of heap |
|
128 |
REGISTER_DECLARATION(Register, rheapbase, r27); |
|
129 |
// constant pool cache |
|
130 |
REGISTER_DECLARATION(Register, rcpool, r26); |
|
131 |
// monitors allocated on stack |
|
132 |
REGISTER_DECLARATION(Register, rmonitors, r25); |
|
133 |
// locals on stack |
|
134 |
REGISTER_DECLARATION(Register, rlocals, r24); |
|
135 |
// bytecode pointer |
|
136 |
REGISTER_DECLARATION(Register, rbcp, r22); |
|
137 |
// Dispatch table base |
|
35127 | 138 |
REGISTER_DECLARATION(Register, rdispatch, r21); |
29183 | 139 |
// Java stack pointer |
140 |
REGISTER_DECLARATION(Register, esp, r20); |
|
141 |
||
142 |
#define assert_cond(ARG1) assert(ARG1, #ARG1) |
|
143 |
||
144 |
namespace asm_util { |
|
145 |
uint32_t encode_logical_immediate(bool is32, uint64_t imm); |
|
146 |
}; |
|
147 |
||
148 |
using namespace asm_util; |
|
149 |
||
150 |
||
151 |
class Assembler; |
|
152 |
||
153 |
class Instruction_aarch64 { |
|
154 |
unsigned insn; |
|
155 |
#ifdef ASSERT |
|
156 |
unsigned bits; |
|
157 |
#endif |
|
158 |
Assembler *assem; |
|
159 |
||
160 |
public: |
|
161 |
||
162 |
Instruction_aarch64(class Assembler *as) { |
|
163 |
#ifdef ASSERT |
|
164 |
bits = 0; |
|
165 |
#endif |
|
166 |
insn = 0; |
|
167 |
assem = as; |
|
168 |
} |
|
169 |
||
170 |
inline ~Instruction_aarch64(); |
|
171 |
||
172 |
unsigned &get_insn() { return insn; } |
|
173 |
#ifdef ASSERT |
|
174 |
unsigned &get_bits() { return bits; } |
|
175 |
#endif |
|
176 |
||
177 |
static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) { |
|
178 |
union { |
|
179 |
unsigned u; |
|
180 |
int n; |
|
181 |
}; |
|
182 |
||
183 |
u = val << (31 - hi); |
|
184 |
n = n >> (31 - hi + lo); |
|
185 |
return n; |
|
186 |
} |
|
187 |
||
188 |
static inline uint32_t extract(uint32_t val, int msb, int lsb) { |
|
189 |
int nbits = msb - lsb + 1; |
|
190 |
assert_cond(msb >= lsb); |
|
191 |
uint32_t mask = (1U << nbits) - 1; |
|
192 |
uint32_t result = val >> lsb; |
|
193 |
result &= mask; |
|
194 |
return result; |
|
195 |
} |
|
196 |
||
197 |
static inline int32_t sextract(uint32_t val, int msb, int lsb) { |
|
198 |
uint32_t uval = extract(val, msb, lsb); |
|
199 |
return extend(uval, msb - lsb); |
|
200 |
} |
|
201 |
||
202 |
static void patch(address a, int msb, int lsb, unsigned long val) { |
|
203 |
int nbits = msb - lsb + 1; |
|
204 |
guarantee(val < (1U << nbits), "Field too big for insn"); |
|
205 |
assert_cond(msb >= lsb); |
|
206 |
unsigned mask = (1U << nbits) - 1; |
|
207 |
val <<= lsb; |
|
208 |
mask <<= lsb; |
|
209 |
unsigned target = *(unsigned *)a; |
|
210 |
target &= ~mask; |
|
211 |
target |= val; |
|
212 |
*(unsigned *)a = target; |
|
213 |
} |
|
214 |
||
215 |
static void spatch(address a, int msb, int lsb, long val) { |
|
216 |
int nbits = msb - lsb + 1; |
|
217 |
long chk = val >> (nbits - 1); |
|
218 |
guarantee (chk == -1 || chk == 0, "Field too big for insn"); |
|
219 |
unsigned uval = val; |
|
220 |
unsigned mask = (1U << nbits) - 1; |
|
221 |
uval &= mask; |
|
222 |
uval <<= lsb; |
|
223 |
mask <<= lsb; |
|
224 |
unsigned target = *(unsigned *)a; |
|
225 |
target &= ~mask; |
|
226 |
target |= uval; |
|
227 |
*(unsigned *)a = target; |
|
228 |
} |
|
229 |
||
230 |
void f(unsigned val, int msb, int lsb) { |
|
231 |
int nbits = msb - lsb + 1; |
|
232 |
guarantee(val < (1U << nbits), "Field too big for insn"); |
|
233 |
assert_cond(msb >= lsb); |
|
234 |
unsigned mask = (1U << nbits) - 1; |
|
235 |
val <<= lsb; |
|
236 |
mask <<= lsb; |
|
237 |
insn |= val; |
|
238 |
assert_cond((bits & mask) == 0); |
|
239 |
#ifdef ASSERT |
|
240 |
bits |= mask; |
|
241 |
#endif |
|
242 |
} |
|
243 |
||
244 |
void f(unsigned val, int bit) { |
|
245 |
f(val, bit, bit); |
|
246 |
} |
|
247 |
||
248 |
void sf(long val, int msb, int lsb) { |
|
249 |
int nbits = msb - lsb + 1; |
|
250 |
long chk = val >> (nbits - 1); |
|
251 |
guarantee (chk == -1 || chk == 0, "Field too big for insn"); |
|
252 |
unsigned uval = val; |
|
253 |
unsigned mask = (1U << nbits) - 1; |
|
254 |
uval &= mask; |
|
255 |
f(uval, lsb + nbits - 1, lsb); |
|
256 |
} |
|
257 |
||
258 |
void rf(Register r, int lsb) { |
|
259 |
f(r->encoding_nocheck(), lsb + 4, lsb); |
|
260 |
} |
|
261 |
||
262 |
// reg|ZR |
|
263 |
void zrf(Register r, int lsb) { |
|
264 |
f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb); |
|
265 |
} |
|
266 |
||
267 |
// reg|SP |
|
268 |
void srf(Register r, int lsb) { |
|
269 |
f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb); |
|
270 |
} |
|
271 |
||
272 |
void rf(FloatRegister r, int lsb) { |
|
273 |
f(r->encoding_nocheck(), lsb + 4, lsb); |
|
274 |
} |
|
275 |
||
276 |
unsigned get(int msb = 31, int lsb = 0) { |
|
277 |
int nbits = msb - lsb + 1; |
|
278 |
unsigned mask = ((1U << nbits) - 1) << lsb; |
|
279 |
assert_cond(bits & mask == mask); |
|
280 |
return (insn & mask) >> lsb; |
|
281 |
} |
|
282 |
||
283 |
void fixed(unsigned value, unsigned mask) { |
|
284 |
assert_cond ((mask & bits) == 0); |
|
285 |
#ifdef ASSERT |
|
286 |
bits |= mask; |
|
287 |
#endif |
|
288 |
insn |= value; |
|
289 |
} |
|
290 |
}; |
|
291 |
||
292 |
#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use) |
|
293 |
||
294 |
class PrePost { |
|
295 |
int _offset; |
|
296 |
Register _r; |
|
297 |
public: |
|
298 |
PrePost(Register reg, int o) : _r(reg), _offset(o) { } |
|
299 |
int offset() { return _offset; } |
|
300 |
Register reg() { return _r; } |
|
301 |
}; |
|
302 |
||
303 |
class Pre : public PrePost { |
|
304 |
public: |
|
305 |
Pre(Register reg, int o) : PrePost(reg, o) { } |
|
306 |
}; |
|
307 |
class Post : public PrePost { |
|
308 |
public: |
|
309 |
Post(Register reg, int o) : PrePost(reg, o) { } |
|
310 |
}; |
|
311 |
||
312 |
namespace ext |
|
313 |
{ |
|
314 |
enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx }; |
|
315 |
}; |
|
316 |
||
317 |
// abs methods which cannot overflow and so are well-defined across |
|
318 |
// the entire domain of integer types. |
|
319 |
static inline unsigned int uabs(unsigned int n) { |
|
320 |
union { |
|
321 |
unsigned int result; |
|
322 |
int value; |
|
323 |
}; |
|
324 |
result = n; |
|
325 |
if (value < 0) result = -result; |
|
326 |
return result; |
|
327 |
} |
|
328 |
static inline unsigned long uabs(unsigned long n) { |
|
329 |
union { |
|
330 |
unsigned long result; |
|
331 |
long value; |
|
332 |
}; |
|
333 |
result = n; |
|
334 |
if (value < 0) result = -result; |
|
335 |
return result; |
|
336 |
} |
|
337 |
static inline unsigned long uabs(long n) { return uabs((unsigned long)n); } |
|
338 |
static inline unsigned long uabs(int n) { return uabs((unsigned int)n); } |
|
339 |
||
340 |
// Addressing modes |
|
341 |
class Address VALUE_OBJ_CLASS_SPEC { |
|
342 |
public: |
|
343 |
||
344 |
enum mode { no_mode, base_plus_offset, pre, post, pcrel, |
|
345 |
base_plus_offset_reg, literal }; |
|
346 |
||
347 |
// Shift and extend for base reg + reg offset addressing |
|
348 |
class extend { |
|
349 |
int _option, _shift; |
|
350 |
ext::operation _op; |
|
351 |
public: |
|
352 |
extend() { } |
|
353 |
extend(int s, int o, ext::operation op) : _shift(s), _option(o), _op(op) { } |
|
354 |
int option() const{ return _option; } |
|
355 |
int shift() const { return _shift; } |
|
356 |
ext::operation op() const { return _op; } |
|
357 |
}; |
|
358 |
class uxtw : public extend { |
|
359 |
public: |
|
360 |
uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { } |
|
361 |
}; |
|
362 |
class lsl : public extend { |
|
363 |
public: |
|
364 |
lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { } |
|
365 |
}; |
|
366 |
class sxtw : public extend { |
|
367 |
public: |
|
368 |
sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { } |
|
369 |
}; |
|
370 |
class sxtx : public extend { |
|
371 |
public: |
|
372 |
sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { } |
|
373 |
}; |
|
374 |
||
375 |
private: |
|
376 |
Register _base; |
|
377 |
Register _index; |
|
378 |
long _offset; |
|
379 |
enum mode _mode; |
|
380 |
extend _ext; |
|
381 |
||
382 |
RelocationHolder _rspec; |
|
383 |
||
384 |
// Typically we use AddressLiterals we want to use their rval |
|
385 |
// However in some situations we want the lval (effect address) of |
|
386 |
// the item. We provide a special factory for making those lvals. |
|
387 |
bool _is_lval; |
|
388 |
||
389 |
// If the target is far we'll need to load the ea of this to a |
|
390 |
// register to reach it. Otherwise if near we can do PC-relative |
|
391 |
// addressing. |
|
392 |
address _target; |
|
393 |
||
394 |
public: |
|
395 |
Address() |
|
396 |
: _mode(no_mode) { } |
|
397 |
Address(Register r) |
|
398 |
: _mode(base_plus_offset), _base(r), _offset(0), _index(noreg), _target(0) { } |
|
399 |
Address(Register r, int o) |
|
400 |
: _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { } |
|
401 |
Address(Register r, long o) |
|
402 |
: _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { } |
|
403 |
Address(Register r, unsigned long o) |
|
404 |
: _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { } |
|
405 |
#ifdef ASSERT |
|
406 |
Address(Register r, ByteSize disp) |
|
407 |
: _mode(base_plus_offset), _base(r), _offset(in_bytes(disp)), |
|
408 |
_index(noreg), _target(0) { } |
|
409 |
#endif |
|
410 |
Address(Register r, Register r1, extend ext = lsl()) |
|
411 |
: _mode(base_plus_offset_reg), _base(r), _index(r1), |
|
412 |
_ext(ext), _offset(0), _target(0) { } |
|
413 |
Address(Pre p) |
|
414 |
: _mode(pre), _base(p.reg()), _offset(p.offset()) { } |
|
415 |
Address(Post p) |
|
416 |
: _mode(post), _base(p.reg()), _offset(p.offset()), _target(0) { } |
|
417 |
Address(address target, RelocationHolder const& rspec) |
|
418 |
: _mode(literal), |
|
419 |
_rspec(rspec), |
|
420 |
_is_lval(false), |
|
421 |
_target(target) { } |
|
422 |
Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); |
|
423 |
Address(Register base, RegisterOrConstant index, extend ext = lsl()) |
|
424 |
: _base (base), |
|
425 |
_ext(ext), _offset(0), _target(0) { |
|
426 |
if (index.is_register()) { |
|
427 |
_mode = base_plus_offset_reg; |
|
428 |
_index = index.as_register(); |
|
429 |
} else { |
|
430 |
guarantee(ext.option() == ext::uxtx, "should be"); |
|
431 |
assert(index.is_constant(), "should be"); |
|
432 |
_mode = base_plus_offset; |
|
433 |
_offset = index.as_constant() << ext.shift(); |
|
434 |
} |
|
435 |
} |
|
436 |
||
437 |
Register base() const { |
|
438 |
guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg |
|
439 |
| _mode == post), |
|
440 |
"wrong mode"); |
|
441 |
return _base; |
|
442 |
} |
|
443 |
long offset() const { |
|
444 |
return _offset; |
|
445 |
} |
|
446 |
Register index() const { |
|
447 |
return _index; |
|
448 |
} |
|
449 |
mode getMode() const { |
|
450 |
return _mode; |
|
451 |
} |
|
452 |
bool uses(Register reg) const { return _base == reg || _index == reg; } |
|
453 |
address target() const { return _target; } |
|
454 |
const RelocationHolder& rspec() const { return _rspec; } |
|
455 |
||
456 |
void encode(Instruction_aarch64 *i) const { |
|
457 |
i->f(0b111, 29, 27); |
|
458 |
i->srf(_base, 5); |
|
459 |
||
460 |
switch(_mode) { |
|
461 |
case base_plus_offset: |
|
462 |
{ |
|
463 |
unsigned size = i->get(31, 30); |
|
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
464 |
if (i->get(26, 26) && i->get(23, 23)) { |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
465 |
// SIMD Q Type - Size = 128 bits |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
466 |
assert(size == 0, "bad size"); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
467 |
size = 0b100; |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
468 |
} |
29183 | 469 |
unsigned mask = (1 << size) - 1; |
470 |
if (_offset < 0 || _offset & mask) |
|
471 |
{ |
|
472 |
i->f(0b00, 25, 24); |
|
473 |
i->f(0, 21), i->f(0b00, 11, 10); |
|
474 |
i->sf(_offset, 20, 12); |
|
475 |
} else { |
|
476 |
i->f(0b01, 25, 24); |
|
477 |
i->f(_offset >> size, 21, 10); |
|
478 |
} |
|
479 |
} |
|
480 |
break; |
|
481 |
||
482 |
case base_plus_offset_reg: |
|
483 |
{ |
|
484 |
i->f(0b00, 25, 24); |
|
485 |
i->f(1, 21); |
|
486 |
i->rf(_index, 16); |
|
487 |
i->f(_ext.option(), 15, 13); |
|
488 |
unsigned size = i->get(31, 30); |
|
31227
964d24a82077
8129551: aarch64: some regressions introduced by addition of vectorisation code
enevill
parents:
30890
diff
changeset
|
489 |
if (i->get(26, 26) && i->get(23, 23)) { |
964d24a82077
8129551: aarch64: some regressions introduced by addition of vectorisation code
enevill
parents:
30890
diff
changeset
|
490 |
// SIMD Q Type - Size = 128 bits |
964d24a82077
8129551: aarch64: some regressions introduced by addition of vectorisation code
enevill
parents:
30890
diff
changeset
|
491 |
assert(size == 0, "bad size"); |
964d24a82077
8129551: aarch64: some regressions introduced by addition of vectorisation code
enevill
parents:
30890
diff
changeset
|
492 |
size = 0b100; |
964d24a82077
8129551: aarch64: some regressions introduced by addition of vectorisation code
enevill
parents:
30890
diff
changeset
|
493 |
} |
29183 | 494 |
if (size == 0) // It's a byte |
495 |
i->f(_ext.shift() >= 0, 12); |
|
496 |
else { |
|
497 |
if (_ext.shift() > 0) |
|
498 |
assert(_ext.shift() == (int)size, "bad shift"); |
|
499 |
i->f(_ext.shift() > 0, 12); |
|
500 |
} |
|
501 |
i->f(0b10, 11, 10); |
|
502 |
} |
|
503 |
break; |
|
504 |
||
505 |
case pre: |
|
506 |
i->f(0b00, 25, 24); |
|
507 |
i->f(0, 21), i->f(0b11, 11, 10); |
|
508 |
i->sf(_offset, 20, 12); |
|
509 |
break; |
|
510 |
||
511 |
case post: |
|
512 |
i->f(0b00, 25, 24); |
|
513 |
i->f(0, 21), i->f(0b01, 11, 10); |
|
514 |
i->sf(_offset, 20, 12); |
|
515 |
break; |
|
516 |
||
517 |
default: |
|
518 |
ShouldNotReachHere(); |
|
519 |
} |
|
520 |
} |
|
521 |
||
522 |
void encode_pair(Instruction_aarch64 *i) const { |
|
523 |
switch(_mode) { |
|
524 |
case base_plus_offset: |
|
525 |
i->f(0b010, 25, 23); |
|
526 |
break; |
|
527 |
case pre: |
|
528 |
i->f(0b011, 25, 23); |
|
529 |
break; |
|
530 |
case post: |
|
531 |
i->f(0b001, 25, 23); |
|
532 |
break; |
|
533 |
default: |
|
534 |
ShouldNotReachHere(); |
|
535 |
} |
|
536 |
||
537 |
unsigned size; // Operand shift in 32-bit words |
|
538 |
||
539 |
if (i->get(26, 26)) { // float |
|
540 |
switch(i->get(31, 30)) { |
|
541 |
case 0b10: |
|
542 |
size = 2; break; |
|
543 |
case 0b01: |
|
544 |
size = 1; break; |
|
545 |
case 0b00: |
|
546 |
size = 0; break; |
|
547 |
default: |
|
548 |
ShouldNotReachHere(); |
|
35127 | 549 |
size = 0; // unreachable |
29183 | 550 |
} |
551 |
} else { |
|
552 |
size = i->get(31, 31); |
|
553 |
} |
|
554 |
||
555 |
size = 4 << size; |
|
556 |
guarantee(_offset % size == 0, "bad offset"); |
|
557 |
i->sf(_offset / size, 21, 15); |
|
558 |
i->srf(_base, 5); |
|
559 |
} |
|
560 |
||
561 |
void encode_nontemporal_pair(Instruction_aarch64 *i) const { |
|
562 |
// Only base + offset is allowed |
|
563 |
i->f(0b000, 25, 23); |
|
564 |
unsigned size = i->get(31, 31); |
|
565 |
size = 4 << size; |
|
566 |
guarantee(_offset % size == 0, "bad offset"); |
|
567 |
i->sf(_offset / size, 21, 15); |
|
568 |
i->srf(_base, 5); |
|
569 |
guarantee(_mode == Address::base_plus_offset, |
|
570 |
"Bad addressing mode for non-temporal op"); |
|
571 |
} |
|
572 |
||
573 |
void lea(MacroAssembler *, Register) const; |
|
574 |
||
575 |
static bool offset_ok_for_immed(long offset, int shift = 0) { |
|
576 |
unsigned mask = (1 << shift) - 1; |
|
577 |
if (offset < 0 || offset & mask) { |
|
578 |
return (uabs(offset) < (1 << (20 - 12))); // Unscaled offset |
|
579 |
} else { |
|
580 |
return ((offset >> shift) < (1 << (21 - 10 + 1))); // Scaled, unsigned offset |
|
581 |
} |
|
582 |
} |
|
583 |
}; |
|
584 |
||
585 |
// Convience classes |
|
586 |
class RuntimeAddress: public Address { |
|
587 |
||
588 |
public: |
|
589 |
||
590 |
RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {} |
|
591 |
||
592 |
}; |
|
593 |
||
594 |
class OopAddress: public Address { |
|
595 |
||
596 |
public: |
|
597 |
||
598 |
OopAddress(address target) : Address(target, relocInfo::oop_type){} |
|
599 |
||
600 |
}; |
|
601 |
||
602 |
class ExternalAddress: public Address { |
|
603 |
private: |
|
604 |
static relocInfo::relocType reloc_for_target(address target) { |
|
605 |
// Sometimes ExternalAddress is used for values which aren't |
|
606 |
// exactly addresses, like the card table base. |
|
607 |
// external_word_type can't be used for values in the first page |
|
608 |
// so just skip the reloc in that case. |
|
609 |
return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; |
|
610 |
} |
|
611 |
||
612 |
public: |
|
613 |
||
614 |
ExternalAddress(address target) : Address(target, reloc_for_target(target)) {} |
|
615 |
||
616 |
}; |
|
617 |
||
618 |
class InternalAddress: public Address { |
|
619 |
||
620 |
public: |
|
621 |
||
622 |
InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {} |
|
623 |
}; |
|
624 |
||
625 |
const int FPUStateSizeInWords = 32 * 2; |
|
626 |
typedef enum { |
|
627 |
PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM, |
|
628 |
PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM, |
|
629 |
PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM |
|
630 |
} prfop; |
|
631 |
||
632 |
class Assembler : public AbstractAssembler { |
|
633 |
||
634 |
#ifndef PRODUCT |
|
635 |
static const unsigned long asm_bp; |
|
636 |
||
637 |
void emit_long(jint x) { |
|
638 |
if ((unsigned long)pc() == asm_bp) |
|
639 |
asm volatile ("nop"); |
|
640 |
AbstractAssembler::emit_int32(x); |
|
641 |
} |
|
642 |
#else |
|
643 |
void emit_long(jint x) { |
|
644 |
AbstractAssembler::emit_int32(x); |
|
645 |
} |
|
646 |
#endif |
|
647 |
||
648 |
public: |
|
649 |
||
650 |
enum { instruction_size = 4 }; |
|
651 |
||
652 |
Address adjust(Register base, int offset, bool preIncrement) { |
|
653 |
if (preIncrement) |
|
654 |
return Address(Pre(base, offset)); |
|
655 |
else |
|
656 |
return Address(Post(base, offset)); |
|
657 |
} |
|
658 |
||
659 |
Address pre(Register base, int offset) { |
|
660 |
return adjust(base, offset, true); |
|
661 |
} |
|
662 |
||
663 |
Address post (Register base, int offset) { |
|
664 |
return adjust(base, offset, false); |
|
665 |
} |
|
666 |
||
667 |
Instruction_aarch64* current; |
|
668 |
||
669 |
void set_current(Instruction_aarch64* i) { current = i; } |
|
670 |
||
671 |
void f(unsigned val, int msb, int lsb) { |
|
672 |
current->f(val, msb, lsb); |
|
673 |
} |
|
674 |
void f(unsigned val, int msb) { |
|
675 |
current->f(val, msb, msb); |
|
676 |
} |
|
677 |
void sf(long val, int msb, int lsb) { |
|
678 |
current->sf(val, msb, lsb); |
|
679 |
} |
|
680 |
void rf(Register reg, int lsb) { |
|
681 |
current->rf(reg, lsb); |
|
682 |
} |
|
683 |
void srf(Register reg, int lsb) { |
|
684 |
current->srf(reg, lsb); |
|
685 |
} |
|
686 |
void zrf(Register reg, int lsb) { |
|
687 |
current->zrf(reg, lsb); |
|
688 |
} |
|
689 |
void rf(FloatRegister reg, int lsb) { |
|
690 |
current->rf(reg, lsb); |
|
691 |
} |
|
692 |
void fixed(unsigned value, unsigned mask) { |
|
693 |
current->fixed(value, mask); |
|
694 |
} |
|
695 |
||
696 |
void emit() { |
|
697 |
emit_long(current->get_insn()); |
|
698 |
assert_cond(current->get_bits() == 0xffffffff); |
|
699 |
current = NULL; |
|
700 |
} |
|
701 |
||
702 |
typedef void (Assembler::* uncond_branch_insn)(address dest); |
|
703 |
typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest); |
|
704 |
typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest); |
|
705 |
typedef void (Assembler::* prefetch_insn)(address target, prfop); |
|
706 |
||
707 |
void wrap_label(Label &L, uncond_branch_insn insn); |
|
708 |
void wrap_label(Register r, Label &L, compare_and_branch_insn insn); |
|
709 |
void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn); |
|
710 |
void wrap_label(Label &L, prfop, prefetch_insn insn); |
|
711 |
||
712 |
// PC-rel. addressing |
|
713 |
||
714 |
void adr(Register Rd, address dest); |
|
715 |
void _adrp(Register Rd, address dest); |
|
716 |
||
717 |
void adr(Register Rd, const Address &dest); |
|
718 |
void _adrp(Register Rd, const Address &dest); |
|
719 |
||
720 |
void adr(Register Rd, Label &L) { |
|
721 |
wrap_label(Rd, L, &Assembler::Assembler::adr); |
|
722 |
} |
|
723 |
void _adrp(Register Rd, Label &L) { |
|
724 |
wrap_label(Rd, L, &Assembler::_adrp); |
|
725 |
} |
|
726 |
||
727 |
void adrp(Register Rd, const Address &dest, unsigned long &offset); |
|
728 |
||
729 |
#undef INSN |
|
730 |
||
731 |
void add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op, |
|
732 |
int negated_op); |
|
733 |
||
734 |
// Add/subtract (immediate) |
|
735 |
#define INSN(NAME, decode, negated) \ |
|
736 |
void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) { \ |
|
737 |
starti; \ |
|
738 |
f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \ |
|
739 |
zrf(Rd, 0), srf(Rn, 5); \ |
|
740 |
} \ |
|
741 |
\ |
|
742 |
void NAME(Register Rd, Register Rn, unsigned imm) { \ |
|
743 |
starti; \ |
|
744 |
add_sub_immediate(Rd, Rn, imm, decode, negated); \ |
|
745 |
} |
|
746 |
||
747 |
INSN(addsw, 0b001, 0b011); |
|
748 |
INSN(subsw, 0b011, 0b001); |
|
749 |
INSN(adds, 0b101, 0b111); |
|
750 |
INSN(subs, 0b111, 0b101); |
|
751 |
||
752 |
#undef INSN |
|
753 |
||
754 |
#define INSN(NAME, decode, negated) \ |
|
755 |
void NAME(Register Rd, Register Rn, unsigned imm) { \ |
|
756 |
starti; \ |
|
757 |
add_sub_immediate(Rd, Rn, imm, decode, negated); \ |
|
758 |
} |
|
759 |
||
760 |
INSN(addw, 0b000, 0b010); |
|
761 |
INSN(subw, 0b010, 0b000); |
|
762 |
INSN(add, 0b100, 0b110); |
|
763 |
INSN(sub, 0b110, 0b100); |
|
764 |
||
765 |
#undef INSN |
|
766 |
||
767 |
// Logical (immediate) |
|
768 |
#define INSN(NAME, decode, is32) \ |
|
769 |
void NAME(Register Rd, Register Rn, uint64_t imm) { \ |
|
770 |
starti; \ |
|
771 |
uint32_t val = encode_logical_immediate(is32, imm); \ |
|
772 |
f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10); \ |
|
773 |
srf(Rd, 0), zrf(Rn, 5); \ |
|
774 |
} |
|
775 |
||
776 |
INSN(andw, 0b000, true); |
|
777 |
INSN(orrw, 0b001, true); |
|
778 |
INSN(eorw, 0b010, true); |
|
779 |
INSN(andr, 0b100, false); |
|
780 |
INSN(orr, 0b101, false); |
|
781 |
INSN(eor, 0b110, false); |
|
782 |
||
783 |
#undef INSN |
|
784 |
||
785 |
#define INSN(NAME, decode, is32) \ |
|
786 |
void NAME(Register Rd, Register Rn, uint64_t imm) { \ |
|
787 |
starti; \ |
|
788 |
uint32_t val = encode_logical_immediate(is32, imm); \ |
|
789 |
f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10); \ |
|
790 |
zrf(Rd, 0), zrf(Rn, 5); \ |
|
791 |
} |
|
792 |
||
793 |
INSN(ands, 0b111, false); |
|
794 |
INSN(andsw, 0b011, true); |
|
795 |
||
796 |
#undef INSN |
|
797 |
||
798 |
// Move wide (immediate) |
|
799 |
#define INSN(NAME, opcode) \ |
|
800 |
void NAME(Register Rd, unsigned imm, unsigned shift = 0) { \ |
|
801 |
assert_cond((shift/16)*16 == shift); \ |
|
802 |
starti; \ |
|
803 |
f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21), \ |
|
804 |
f(imm, 20, 5); \ |
|
805 |
rf(Rd, 0); \ |
|
806 |
} |
|
807 |
||
808 |
INSN(movnw, 0b000); |
|
809 |
INSN(movzw, 0b010); |
|
810 |
INSN(movkw, 0b011); |
|
811 |
INSN(movn, 0b100); |
|
812 |
INSN(movz, 0b110); |
|
813 |
INSN(movk, 0b111); |
|
814 |
||
815 |
#undef INSN |
|
816 |
||
817 |
// Bitfield |
|
818 |
#define INSN(NAME, opcode) \ |
|
819 |
void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) { \ |
|
820 |
starti; \ |
|
821 |
f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10); \ |
|
822 |
rf(Rn, 5), rf(Rd, 0); \ |
|
823 |
} |
|
824 |
||
825 |
INSN(sbfmw, 0b0001001100); |
|
826 |
INSN(bfmw, 0b0011001100); |
|
827 |
INSN(ubfmw, 0b0101001100); |
|
828 |
INSN(sbfm, 0b1001001101); |
|
829 |
INSN(bfm, 0b1011001101); |
|
830 |
INSN(ubfm, 0b1101001101); |
|
831 |
||
832 |
#undef INSN |
|
833 |
||
834 |
// Extract |
|
835 |
#define INSN(NAME, opcode) \ |
|
836 |
void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) { \ |
|
837 |
starti; \ |
|
838 |
f(opcode, 31, 21), f(imms, 15, 10); \ |
|
839 |
rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ |
|
840 |
} |
|
841 |
||
842 |
INSN(extrw, 0b00010011100); |
|
843 |
INSN(extr, 0b10010011110); |
|
844 |
||
845 |
#undef INSN |
|
846 |
||
847 |
// The maximum range of a branch is fixed for the AArch64 |
|
848 |
// architecture. In debug mode we shrink it in order to test |
|
849 |
// trampolines, but not so small that branches in the interpreter |
|
850 |
// are out of range. |
|
851 |
static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); |
|
852 |
||
853 |
static bool reachable_from_branch_at(address branch, address target) { |
|
854 |
return uabs(target - branch) < branch_range; |
|
855 |
} |
|
856 |
||
857 |
// Unconditional branch (immediate) |
|
858 |
#define INSN(NAME, opcode) \ |
|
859 |
void NAME(address dest) { \ |
|
860 |
starti; \ |
|
861 |
long offset = (dest - pc()) >> 2; \ |
|
862 |
DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \ |
|
863 |
f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0); \ |
|
864 |
} \ |
|
865 |
void NAME(Label &L) { \ |
|
866 |
wrap_label(L, &Assembler::NAME); \ |
|
867 |
} \ |
|
868 |
void NAME(const Address &dest); |
|
869 |
||
870 |
INSN(b, 0); |
|
871 |
INSN(bl, 1); |
|
872 |
||
873 |
#undef INSN |
|
874 |
||
875 |
// Compare & branch (immediate) |
|
876 |
#define INSN(NAME, opcode) \ |
|
877 |
void NAME(Register Rt, address dest) { \ |
|
878 |
long offset = (dest - pc()) >> 2; \ |
|
879 |
starti; \ |
|
880 |
f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0); \ |
|
881 |
} \ |
|
882 |
void NAME(Register Rt, Label &L) { \ |
|
883 |
wrap_label(Rt, L, &Assembler::NAME); \ |
|
884 |
} |
|
885 |
||
886 |
INSN(cbzw, 0b00110100); |
|
887 |
INSN(cbnzw, 0b00110101); |
|
888 |
INSN(cbz, 0b10110100); |
|
889 |
INSN(cbnz, 0b10110101); |
|
890 |
||
891 |
#undef INSN |
|
892 |
||
893 |
// Test & branch (immediate) |
|
894 |
#define INSN(NAME, opcode) \ |
|
895 |
void NAME(Register Rt, int bitpos, address dest) { \ |
|
896 |
long offset = (dest - pc()) >> 2; \ |
|
897 |
int b5 = bitpos >> 5; \ |
|
898 |
bitpos &= 0x1f; \ |
|
899 |
starti; \ |
|
900 |
f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \ |
|
901 |
rf(Rt, 0); \ |
|
902 |
} \ |
|
903 |
void NAME(Register Rt, int bitpos, Label &L) { \ |
|
904 |
wrap_label(Rt, bitpos, L, &Assembler::NAME); \ |
|
905 |
} |
|
906 |
||
907 |
INSN(tbz, 0b0110110); |
|
908 |
INSN(tbnz, 0b0110111); |
|
909 |
||
910 |
#undef INSN |
|
911 |
||
912 |
// Conditional branch (immediate) |
|
913 |
enum Condition |
|
914 |
{EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV}; |
|
915 |
||
916 |
void br(Condition cond, address dest) { |
|
917 |
long offset = (dest - pc()) >> 2; |
|
918 |
starti; |
|
919 |
f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0); |
|
920 |
} |
|
921 |
||
922 |
#define INSN(NAME, cond) \ |
|
923 |
void NAME(address dest) { \ |
|
924 |
br(cond, dest); \ |
|
925 |
} |
|
926 |
||
927 |
INSN(beq, EQ); |
|
928 |
INSN(bne, NE); |
|
929 |
INSN(bhs, HS); |
|
930 |
INSN(bcs, CS); |
|
931 |
INSN(blo, LO); |
|
932 |
INSN(bcc, CC); |
|
933 |
INSN(bmi, MI); |
|
934 |
INSN(bpl, PL); |
|
935 |
INSN(bvs, VS); |
|
936 |
INSN(bvc, VC); |
|
937 |
INSN(bhi, HI); |
|
938 |
INSN(bls, LS); |
|
939 |
INSN(bge, GE); |
|
940 |
INSN(blt, LT); |
|
941 |
INSN(bgt, GT); |
|
942 |
INSN(ble, LE); |
|
943 |
INSN(bal, AL); |
|
944 |
INSN(bnv, NV); |
|
945 |
||
946 |
void br(Condition cc, Label &L); |
|
947 |
||
948 |
#undef INSN |
|
949 |
||
950 |
// Exception generation |
|
951 |
void generate_exception(int opc, int op2, int LL, unsigned imm) { |
|
952 |
starti; |
|
953 |
f(0b11010100, 31, 24); |
|
954 |
f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0); |
|
955 |
} |
|
956 |
||
957 |
#define INSN(NAME, opc, op2, LL) \ |
|
958 |
void NAME(unsigned imm) { \ |
|
959 |
generate_exception(opc, op2, LL, imm); \ |
|
960 |
} |
|
961 |
||
962 |
INSN(svc, 0b000, 0, 0b01); |
|
963 |
INSN(hvc, 0b000, 0, 0b10); |
|
964 |
INSN(smc, 0b000, 0, 0b11); |
|
965 |
INSN(brk, 0b001, 0, 0b00); |
|
966 |
INSN(hlt, 0b010, 0, 0b00); |
|
967 |
INSN(dpcs1, 0b101, 0, 0b01); |
|
968 |
INSN(dpcs2, 0b101, 0, 0b10); |
|
969 |
INSN(dpcs3, 0b101, 0, 0b11); |
|
970 |
||
971 |
#undef INSN |
|
972 |
||
973 |
// System |
|
974 |
void system(int op0, int op1, int CRn, int CRm, int op2, |
|
975 |
Register rt = (Register)0b11111) |
|
976 |
{ |
|
977 |
starti; |
|
978 |
f(0b11010101000, 31, 21); |
|
979 |
f(op0, 20, 19); |
|
980 |
f(op1, 18, 16); |
|
981 |
f(CRn, 15, 12); |
|
982 |
f(CRm, 11, 8); |
|
983 |
f(op2, 7, 5); |
|
984 |
rf(rt, 0); |
|
985 |
} |
|
986 |
||
987 |
void hint(int imm) { |
|
988 |
system(0b00, 0b011, 0b0010, imm, 0b000); |
|
989 |
} |
|
990 |
||
991 |
void nop() { |
|
992 |
hint(0); |
|
993 |
} |
|
994 |
// we only provide mrs and msr for the special purpose system |
|
995 |
// registers where op1 (instr[20:19]) == 11 and, (currently) only |
|
996 |
// use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1 |
|
997 |
||
998 |
void msr(int op1, int CRn, int CRm, int op2, Register rt) { |
|
999 |
starti; |
|
1000 |
f(0b1101010100011, 31, 19); |
|
1001 |
f(op1, 18, 16); |
|
1002 |
f(CRn, 15, 12); |
|
1003 |
f(CRm, 11, 8); |
|
1004 |
f(op2, 7, 5); |
|
1005 |
// writing zr is ok |
|
1006 |
zrf(rt, 0); |
|
1007 |
} |
|
1008 |
||
1009 |
void mrs(int op1, int CRn, int CRm, int op2, Register rt) { |
|
1010 |
starti; |
|
1011 |
f(0b1101010100111, 31, 19); |
|
1012 |
f(op1, 18, 16); |
|
1013 |
f(CRn, 15, 12); |
|
1014 |
f(CRm, 11, 8); |
|
1015 |
f(op2, 7, 5); |
|
1016 |
// reading to zr is a mistake |
|
1017 |
rf(rt, 0); |
|
1018 |
} |
|
1019 |
||
1020 |
enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH, |
|
1021 |
ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY}; |
|
1022 |
||
1023 |
void dsb(barrier imm) { |
|
1024 |
system(0b00, 0b011, 0b00011, imm, 0b100); |
|
1025 |
} |
|
1026 |
||
1027 |
void dmb(barrier imm) { |
|
1028 |
system(0b00, 0b011, 0b00011, imm, 0b101); |
|
1029 |
} |
|
1030 |
||
1031 |
void isb() { |
|
1032 |
system(0b00, 0b011, 0b00011, SY, 0b110); |
|
1033 |
} |
|
1034 |
||
1035 |
void dc(Register Rt) { |
|
1036 |
system(0b01, 0b011, 0b0111, 0b1011, 0b001, Rt); |
|
1037 |
} |
|
1038 |
||
1039 |
void ic(Register Rt) { |
|
1040 |
system(0b01, 0b011, 0b0111, 0b0101, 0b001, Rt); |
|
1041 |
} |
|
1042 |
||
1043 |
// A more convenient access to dmb for our purposes |
|
1044 |
enum Membar_mask_bits { |
|
1045 |
// We can use ISH for a barrier because the ARM ARM says "This |
|
1046 |
// architecture assumes that all Processing Elements that use the |
|
1047 |
// same operating system or hypervisor are in the same Inner |
|
1048 |
// Shareable shareability domain." |
|
1049 |
StoreStore = ISHST, |
|
1050 |
LoadStore = ISHLD, |
|
1051 |
LoadLoad = ISHLD, |
|
1052 |
StoreLoad = ISH, |
|
1053 |
AnyAny = ISH |
|
1054 |
}; |
|
1055 |
||
1056 |
void membar(Membar_mask_bits order_constraint) { |
|
1057 |
dmb(Assembler::barrier(order_constraint)); |
|
1058 |
} |
|
1059 |
||
1060 |
// Unconditional branch (register) |
|
1061 |
void branch_reg(Register R, int opc) { |
|
1062 |
starti; |
|
1063 |
f(0b1101011, 31, 25); |
|
1064 |
f(opc, 24, 21); |
|
1065 |
f(0b11111000000, 20, 10); |
|
1066 |
rf(R, 5); |
|
1067 |
f(0b00000, 4, 0); |
|
1068 |
} |
|
1069 |
||
1070 |
#define INSN(NAME, opc) \ |
|
1071 |
void NAME(Register R) { \ |
|
1072 |
branch_reg(R, opc); \ |
|
1073 |
} |
|
1074 |
||
1075 |
INSN(br, 0b0000); |
|
1076 |
INSN(blr, 0b0001); |
|
1077 |
INSN(ret, 0b0010); |
|
1078 |
||
1079 |
void ret(void *p); // This forces a compile-time error for ret(0) |
|
1080 |
||
1081 |
#undef INSN |
|
1082 |
||
1083 |
#define INSN(NAME, opc) \ |
|
1084 |
void NAME() { \ |
|
1085 |
branch_reg((Register)0b11111, opc); \ |
|
1086 |
} |
|
1087 |
||
1088 |
INSN(eret, 0b0100); |
|
1089 |
INSN(drps, 0b0101); |
|
1090 |
||
1091 |
#undef INSN |
|
1092 |
||
1093 |
// Load/store exclusive |
|
1094 |
enum operand_size { byte, halfword, word, xword }; |
|
1095 |
||
1096 |
void load_store_exclusive(Register Rs, Register Rt1, Register Rt2, |
|
1097 |
Register Rn, enum operand_size sz, int op, int o0) { |
|
1098 |
starti; |
|
1099 |
f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21); |
|
1100 |
rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0); |
|
1101 |
} |
|
1102 |
||
1103 |
#define INSN4(NAME, sz, op, o0) /* Four registers */ \ |
|
1104 |
void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \ |
|
32395
13b0caf18153
8133352: aarch64: generates constrained unpredictable instructions
enevill
parents:
31961
diff
changeset
|
1105 |
guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \ |
29183 | 1106 |
load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \ |
1107 |
} |
|
1108 |
||
1109 |
#define INSN3(NAME, sz, op, o0) /* Three registers */ \ |
|
1110 |
void NAME(Register Rs, Register Rt, Register Rn) { \ |
|
32395
13b0caf18153
8133352: aarch64: generates constrained unpredictable instructions
enevill
parents:
31961
diff
changeset
|
1111 |
guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ |
29183 | 1112 |
load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \ |
1113 |
} |
|
1114 |
||
1115 |
#define INSN2(NAME, sz, op, o0) /* Two registers */ \ |
|
1116 |
void NAME(Register Rt, Register Rn) { \ |
|
1117 |
load_store_exclusive((Register)0b11111, Rt, (Register)0b11111, \ |
|
1118 |
Rn, sz, op, o0); \ |
|
1119 |
} |
|
1120 |
||
1121 |
#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ |
|
1122 |
void NAME(Register Rt1, Register Rt2, Register Rn) { \ |
|
32395
13b0caf18153
8133352: aarch64: generates constrained unpredictable instructions
enevill
parents:
31961
diff
changeset
|
1123 |
guarantee(Rt1 != Rt2, "unpredictable instruction"); \ |
29183 | 1124 |
load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \ |
1125 |
} |
|
1126 |
||
1127 |
// bytes |
|
1128 |
INSN3(stxrb, byte, 0b000, 0); |
|
1129 |
INSN3(stlxrb, byte, 0b000, 1); |
|
1130 |
INSN2(ldxrb, byte, 0b010, 0); |
|
1131 |
INSN2(ldaxrb, byte, 0b010, 1); |
|
1132 |
INSN2(stlrb, byte, 0b100, 1); |
|
1133 |
INSN2(ldarb, byte, 0b110, 1); |
|
1134 |
||
1135 |
// halfwords |
|
1136 |
INSN3(stxrh, halfword, 0b000, 0); |
|
1137 |
INSN3(stlxrh, halfword, 0b000, 1); |
|
1138 |
INSN2(ldxrh, halfword, 0b010, 0); |
|
1139 |
INSN2(ldaxrh, halfword, 0b010, 1); |
|
1140 |
INSN2(stlrh, halfword, 0b100, 1); |
|
1141 |
INSN2(ldarh, halfword, 0b110, 1); |
|
1142 |
||
1143 |
// words |
|
1144 |
INSN3(stxrw, word, 0b000, 0); |
|
1145 |
INSN3(stlxrw, word, 0b000, 1); |
|
1146 |
INSN4(stxpw, word, 0b001, 0); |
|
1147 |
INSN4(stlxpw, word, 0b001, 1); |
|
1148 |
INSN2(ldxrw, word, 0b010, 0); |
|
1149 |
INSN2(ldaxrw, word, 0b010, 1); |
|
1150 |
INSN_FOO(ldxpw, word, 0b011, 0); |
|
1151 |
INSN_FOO(ldaxpw, word, 0b011, 1); |
|
1152 |
INSN2(stlrw, word, 0b100, 1); |
|
1153 |
INSN2(ldarw, word, 0b110, 1); |
|
1154 |
||
1155 |
// xwords |
|
1156 |
INSN3(stxr, xword, 0b000, 0); |
|
1157 |
INSN3(stlxr, xword, 0b000, 1); |
|
1158 |
INSN4(stxp, xword, 0b001, 0); |
|
1159 |
INSN4(stlxp, xword, 0b001, 1); |
|
1160 |
INSN2(ldxr, xword, 0b010, 0); |
|
1161 |
INSN2(ldaxr, xword, 0b010, 1); |
|
1162 |
INSN_FOO(ldxp, xword, 0b011, 0); |
|
1163 |
INSN_FOO(ldaxp, xword, 0b011, 1); |
|
1164 |
INSN2(stlr, xword, 0b100, 1); |
|
1165 |
INSN2(ldar, xword, 0b110, 1); |
|
1166 |
||
1167 |
#undef INSN2 |
|
1168 |
#undef INSN3 |
|
1169 |
#undef INSN4 |
|
1170 |
#undef INSN_FOO |
|
1171 |
||
1172 |
// Load register (literal) |
|
1173 |
#define INSN(NAME, opc, V) \ |
|
1174 |
void NAME(Register Rt, address dest) { \ |
|
1175 |
long offset = (dest - pc()) >> 2; \ |
|
1176 |
starti; \ |
|
1177 |
f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ |
|
1178 |
sf(offset, 23, 5); \ |
|
1179 |
rf(Rt, 0); \ |
|
1180 |
} \ |
|
1181 |
void NAME(Register Rt, address dest, relocInfo::relocType rtype) { \ |
|
1182 |
InstructionMark im(this); \ |
|
1183 |
guarantee(rtype == relocInfo::internal_word_type, \ |
|
1184 |
"only internal_word_type relocs make sense here"); \ |
|
1185 |
code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); \ |
|
1186 |
NAME(Rt, dest); \ |
|
1187 |
} \ |
|
1188 |
void NAME(Register Rt, Label &L) { \ |
|
1189 |
wrap_label(Rt, L, &Assembler::NAME); \ |
|
1190 |
} |
|
1191 |
||
1192 |
INSN(ldrw, 0b00, 0); |
|
1193 |
INSN(ldr, 0b01, 0); |
|
1194 |
INSN(ldrsw, 0b10, 0); |
|
1195 |
||
1196 |
#undef INSN |
|
1197 |
||
1198 |
#define INSN(NAME, opc, V) \ |
|
1199 |
void NAME(FloatRegister Rt, address dest) { \ |
|
1200 |
long offset = (dest - pc()) >> 2; \ |
|
1201 |
starti; \ |
|
1202 |
f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ |
|
1203 |
sf(offset, 23, 5); \ |
|
1204 |
rf((Register)Rt, 0); \ |
|
1205 |
} |
|
1206 |
||
1207 |
INSN(ldrs, 0b00, 1); |
|
1208 |
INSN(ldrd, 0b01, 1); |
|
32574 | 1209 |
INSN(ldrq, 0b10, 1); |
29183 | 1210 |
|
1211 |
#undef INSN |
|
1212 |
||
1213 |
#define INSN(NAME, opc, V) \ |
|
1214 |
void NAME(address dest, prfop op = PLDL1KEEP) { \ |
|
1215 |
long offset = (dest - pc()) >> 2; \ |
|
1216 |
starti; \ |
|
1217 |
f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ |
|
1218 |
sf(offset, 23, 5); \ |
|
1219 |
f(op, 4, 0); \ |
|
1220 |
} \ |
|
1221 |
void NAME(Label &L, prfop op = PLDL1KEEP) { \ |
|
1222 |
wrap_label(L, op, &Assembler::NAME); \ |
|
1223 |
} |
|
1224 |
||
1225 |
INSN(prfm, 0b11, 0); |
|
1226 |
||
1227 |
#undef INSN |
|
1228 |
||
1229 |
// Load/store |
|
1230 |
void ld_st1(int opc, int p1, int V, int L, |
|
1231 |
Register Rt1, Register Rt2, Address adr, bool no_allocate) { |
|
1232 |
starti; |
|
1233 |
f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22); |
|
1234 |
zrf(Rt2, 10), zrf(Rt1, 0); |
|
1235 |
if (no_allocate) { |
|
1236 |
adr.encode_nontemporal_pair(current); |
|
1237 |
} else { |
|
1238 |
adr.encode_pair(current); |
|
1239 |
} |
|
1240 |
} |
|
1241 |
||
1242 |
// Load/store register pair (offset) |
|
1243 |
#define INSN(NAME, size, p1, V, L, no_allocate) \ |
|
1244 |
void NAME(Register Rt1, Register Rt2, Address adr) { \ |
|
1245 |
ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \ |
|
1246 |
} |
|
1247 |
||
1248 |
INSN(stpw, 0b00, 0b101, 0, 0, false); |
|
1249 |
INSN(ldpw, 0b00, 0b101, 0, 1, false); |
|
1250 |
INSN(ldpsw, 0b01, 0b101, 0, 1, false); |
|
1251 |
INSN(stp, 0b10, 0b101, 0, 0, false); |
|
1252 |
INSN(ldp, 0b10, 0b101, 0, 1, false); |
|
1253 |
||
1254 |
// Load/store no-allocate pair (offset) |
|
1255 |
INSN(stnpw, 0b00, 0b101, 0, 0, true); |
|
1256 |
INSN(ldnpw, 0b00, 0b101, 0, 1, true); |
|
1257 |
INSN(stnp, 0b10, 0b101, 0, 0, true); |
|
1258 |
INSN(ldnp, 0b10, 0b101, 0, 1, true); |
|
1259 |
||
1260 |
#undef INSN |
|
1261 |
||
1262 |
#define INSN(NAME, size, p1, V, L, no_allocate) \ |
|
1263 |
void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) { \ |
|
1264 |
ld_st1(size, p1, V, L, (Register)Rt1, (Register)Rt2, adr, no_allocate); \ |
|
1265 |
} |
|
1266 |
||
1267 |
INSN(stps, 0b00, 0b101, 1, 0, false); |
|
1268 |
INSN(ldps, 0b00, 0b101, 1, 1, false); |
|
1269 |
INSN(stpd, 0b01, 0b101, 1, 0, false); |
|
1270 |
INSN(ldpd, 0b01, 0b101, 1, 1, false); |
|
1271 |
INSN(stpq, 0b10, 0b101, 1, 0, false); |
|
1272 |
INSN(ldpq, 0b10, 0b101, 1, 1, false); |
|
1273 |
||
1274 |
#undef INSN |
|
1275 |
||
1276 |
// Load/store register (all modes) |
|
1277 |
void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) { |
|
1278 |
starti; |
|
1279 |
||
1280 |
f(V, 26); // general reg? |
|
1281 |
zrf(Rt, 0); |
|
1282 |
||
1283 |
// Encoding for literal loads is done here (rather than pushed |
|
1284 |
// down into Address::encode) because the encoding of this |
|
1285 |
// instruction is too different from all of the other forms to |
|
1286 |
// make it worth sharing. |
|
1287 |
if (adr.getMode() == Address::literal) { |
|
1288 |
assert(size == 0b10 || size == 0b11, "bad operand size in ldr"); |
|
1289 |
assert(op == 0b01, "literal form can only be used with loads"); |
|
1290 |
f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24); |
|
1291 |
long offset = (adr.target() - pc()) >> 2; |
|
1292 |
sf(offset, 23, 5); |
|
1293 |
code_section()->relocate(pc(), adr.rspec()); |
|
1294 |
return; |
|
1295 |
} |
|
1296 |
||
1297 |
f(size, 31, 30); |
|
1298 |
f(op, 23, 22); // str |
|
1299 |
adr.encode(current); |
|
1300 |
} |
|
1301 |
||
1302 |
#define INSN(NAME, size, op) \ |
|
1303 |
void NAME(Register Rt, const Address &adr) { \ |
|
1304 |
ld_st2(Rt, adr, size, op); \ |
|
1305 |
} \ |
|
1306 |
||
1307 |
INSN(str, 0b11, 0b00); |
|
1308 |
INSN(strw, 0b10, 0b00); |
|
1309 |
INSN(strb, 0b00, 0b00); |
|
1310 |
INSN(strh, 0b01, 0b00); |
|
1311 |
||
1312 |
INSN(ldr, 0b11, 0b01); |
|
1313 |
INSN(ldrw, 0b10, 0b01); |
|
1314 |
INSN(ldrb, 0b00, 0b01); |
|
1315 |
INSN(ldrh, 0b01, 0b01); |
|
1316 |
||
1317 |
INSN(ldrsb, 0b00, 0b10); |
|
1318 |
INSN(ldrsbw, 0b00, 0b11); |
|
1319 |
INSN(ldrsh, 0b01, 0b10); |
|
1320 |
INSN(ldrshw, 0b01, 0b11); |
|
1321 |
INSN(ldrsw, 0b10, 0b10); |
|
1322 |
||
1323 |
#undef INSN |
|
1324 |
||
1325 |
#define INSN(NAME, size, op) \ |
|
1326 |
void NAME(const Address &adr, prfop pfop = PLDL1KEEP) { \ |
|
1327 |
ld_st2((Register)pfop, adr, size, op); \ |
|
1328 |
} |
|
1329 |
||
1330 |
INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with |
|
1331 |
// writeback modes, but the assembler |
|
1332 |
// doesn't enfore that. |
|
1333 |
||
1334 |
#undef INSN |
|
1335 |
||
1336 |
#define INSN(NAME, size, op) \ |
|
1337 |
void NAME(FloatRegister Rt, const Address &adr) { \ |
|
1338 |
ld_st2((Register)Rt, adr, size, op, 1); \ |
|
1339 |
} |
|
1340 |
||
1341 |
INSN(strd, 0b11, 0b00); |
|
1342 |
INSN(strs, 0b10, 0b00); |
|
1343 |
INSN(ldrd, 0b11, 0b01); |
|
1344 |
INSN(ldrs, 0b10, 0b01); |
|
1345 |
INSN(strq, 0b00, 0b10); |
|
1346 |
INSN(ldrq, 0x00, 0b11); |
|
1347 |
||
1348 |
#undef INSN |
|
1349 |
||
1350 |
enum shift_kind { LSL, LSR, ASR, ROR }; |
|
1351 |
||
1352 |
void op_shifted_reg(unsigned decode, |
|
1353 |
enum shift_kind kind, unsigned shift, |
|
1354 |
unsigned size, unsigned op) { |
|
1355 |
f(size, 31); |
|
1356 |
f(op, 30, 29); |
|
1357 |
f(decode, 28, 24); |
|
1358 |
f(shift, 15, 10); |
|
1359 |
f(kind, 23, 22); |
|
1360 |
} |
|
1361 |
||
1362 |
// Logical (shifted register) |
|
1363 |
#define INSN(NAME, size, op, N) \ |
|
1364 |
void NAME(Register Rd, Register Rn, Register Rm, \ |
|
1365 |
enum shift_kind kind = LSL, unsigned shift = 0) { \ |
|
1366 |
starti; \ |
|
1367 |
f(N, 21); \ |
|
1368 |
zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \ |
|
1369 |
op_shifted_reg(0b01010, kind, shift, size, op); \ |
|
1370 |
} |
|
1371 |
||
1372 |
INSN(andr, 1, 0b00, 0); |
|
1373 |
INSN(orr, 1, 0b01, 0); |
|
1374 |
INSN(eor, 1, 0b10, 0); |
|
1375 |
INSN(ands, 1, 0b11, 0); |
|
1376 |
INSN(andw, 0, 0b00, 0); |
|
1377 |
INSN(orrw, 0, 0b01, 0); |
|
1378 |
INSN(eorw, 0, 0b10, 0); |
|
1379 |
INSN(andsw, 0, 0b11, 0); |
|
1380 |
||
1381 |
INSN(bic, 1, 0b00, 1); |
|
1382 |
INSN(orn, 1, 0b01, 1); |
|
1383 |
INSN(eon, 1, 0b10, 1); |
|
1384 |
INSN(bics, 1, 0b11, 1); |
|
1385 |
INSN(bicw, 0, 0b00, 1); |
|
1386 |
INSN(ornw, 0, 0b01, 1); |
|
1387 |
INSN(eonw, 0, 0b10, 1); |
|
1388 |
INSN(bicsw, 0, 0b11, 1); |
|
1389 |
||
1390 |
#undef INSN |
|
1391 |
||
1392 |
// Add/subtract (shifted register) |
|
1393 |
#define INSN(NAME, size, op) \ |
|
1394 |
void NAME(Register Rd, Register Rn, Register Rm, \ |
|
1395 |
enum shift_kind kind, unsigned shift = 0) { \ |
|
1396 |
starti; \ |
|
1397 |
f(0, 21); \ |
|
1398 |
assert_cond(kind != ROR); \ |
|
1399 |
zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16); \ |
|
1400 |
op_shifted_reg(0b01011, kind, shift, size, op); \ |
|
1401 |
} |
|
1402 |
||
1403 |
INSN(add, 1, 0b000); |
|
1404 |
INSN(sub, 1, 0b10); |
|
1405 |
INSN(addw, 0, 0b000); |
|
1406 |
INSN(subw, 0, 0b10); |
|
1407 |
||
1408 |
INSN(adds, 1, 0b001); |
|
1409 |
INSN(subs, 1, 0b11); |
|
1410 |
INSN(addsw, 0, 0b001); |
|
1411 |
INSN(subsw, 0, 0b11); |
|
1412 |
||
1413 |
#undef INSN |
|
1414 |
||
1415 |
// Add/subtract (extended register) |
|
1416 |
#define INSN(NAME, op) \ |
|
1417 |
void NAME(Register Rd, Register Rn, Register Rm, \ |
|
1418 |
ext::operation option, int amount = 0) { \ |
|
1419 |
starti; \ |
|
1420 |
zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0); \ |
|
1421 |
add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ |
|
1422 |
} |
|
1423 |
||
1424 |
void add_sub_extended_reg(unsigned op, unsigned decode, |
|
1425 |
Register Rd, Register Rn, Register Rm, |
|
1426 |
unsigned opt, ext::operation option, unsigned imm) { |
|
1427 |
guarantee(imm <= 4, "shift amount must be < 4"); |
|
1428 |
f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21); |
|
1429 |
f(option, 15, 13), f(imm, 12, 10); |
|
1430 |
} |
|
1431 |
||
1432 |
INSN(addw, 0b000); |
|
1433 |
INSN(subw, 0b010); |
|
1434 |
INSN(add, 0b100); |
|
1435 |
INSN(sub, 0b110); |
|
1436 |
||
1437 |
#undef INSN |
|
1438 |
||
1439 |
#define INSN(NAME, op) \ |
|
1440 |
void NAME(Register Rd, Register Rn, Register Rm, \ |
|
1441 |
ext::operation option, int amount = 0) { \ |
|
1442 |
starti; \ |
|
1443 |
zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0); \ |
|
1444 |
add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ |
|
1445 |
} |
|
1446 |
||
1447 |
INSN(addsw, 0b001); |
|
1448 |
INSN(subsw, 0b011); |
|
1449 |
INSN(adds, 0b101); |
|
1450 |
INSN(subs, 0b111); |
|
1451 |
||
1452 |
#undef INSN |
|
1453 |
||
1454 |
// Aliases for short forms of add and sub |
|
1455 |
#define INSN(NAME) \ |
|
1456 |
void NAME(Register Rd, Register Rn, Register Rm) { \ |
|
1457 |
if (Rd == sp || Rn == sp) \ |
|
1458 |
NAME(Rd, Rn, Rm, ext::uxtx); \ |
|
1459 |
else \ |
|
1460 |
NAME(Rd, Rn, Rm, LSL); \ |
|
1461 |
} |
|
1462 |
||
1463 |
INSN(addw); |
|
1464 |
INSN(subw); |
|
1465 |
INSN(add); |
|
1466 |
INSN(sub); |
|
1467 |
||
1468 |
INSN(addsw); |
|
1469 |
INSN(subsw); |
|
1470 |
INSN(adds); |
|
1471 |
INSN(subs); |
|
1472 |
||
1473 |
#undef INSN |
|
1474 |
||
1475 |
// Add/subtract (with carry) |
|
1476 |
void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) { |
|
1477 |
starti; |
|
1478 |
f(op, 31, 29); |
|
1479 |
f(0b11010000, 28, 21); |
|
1480 |
f(0b000000, 15, 10); |
|
30225
e9722ea461d4
8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents:
29183
diff
changeset
|
1481 |
zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); |
29183 | 1482 |
} |
1483 |
||
1484 |
#define INSN(NAME, op) \ |
|
1485 |
void NAME(Register Rd, Register Rn, Register Rm) { \ |
|
1486 |
add_sub_carry(op, Rd, Rn, Rm); \ |
|
1487 |
} |
|
1488 |
||
1489 |
INSN(adcw, 0b000); |
|
1490 |
INSN(adcsw, 0b001); |
|
1491 |
INSN(sbcw, 0b010); |
|
1492 |
INSN(sbcsw, 0b011); |
|
1493 |
INSN(adc, 0b100); |
|
1494 |
INSN(adcs, 0b101); |
|
1495 |
INSN(sbc,0b110); |
|
1496 |
INSN(sbcs, 0b111); |
|
1497 |
||
1498 |
#undef INSN |
|
1499 |
||
1500 |
// Conditional compare (both kinds) |
|
1501 |
void conditional_compare(unsigned op, int o2, int o3, |
|
1502 |
Register Rn, unsigned imm5, unsigned nzcv, |
|
1503 |
unsigned cond) { |
|
1504 |
f(op, 31, 29); |
|
1505 |
f(0b11010010, 28, 21); |
|
1506 |
f(cond, 15, 12); |
|
1507 |
f(o2, 10); |
|
1508 |
f(o3, 4); |
|
1509 |
f(nzcv, 3, 0); |
|
1510 |
f(imm5, 20, 16), rf(Rn, 5); |
|
1511 |
} |
|
1512 |
||
1513 |
#define INSN(NAME, op) \ |
|
1514 |
void NAME(Register Rn, Register Rm, int imm, Condition cond) { \ |
|
1515 |
starti; \ |
|
1516 |
f(0, 11); \ |
|
1517 |
conditional_compare(op, 0, 0, Rn, (uintptr_t)Rm, imm, cond); \ |
|
1518 |
} \ |
|
1519 |
\ |
|
1520 |
void NAME(Register Rn, int imm5, int imm, Condition cond) { \ |
|
1521 |
starti; \ |
|
1522 |
f(1, 11); \ |
|
1523 |
conditional_compare(op, 0, 0, Rn, imm5, imm, cond); \ |
|
1524 |
} |
|
1525 |
||
1526 |
INSN(ccmnw, 0b001); |
|
1527 |
INSN(ccmpw, 0b011); |
|
1528 |
INSN(ccmn, 0b101); |
|
1529 |
INSN(ccmp, 0b111); |
|
1530 |
||
1531 |
#undef INSN |
|
1532 |
||
1533 |
// Conditional select |
|
1534 |
void conditional_select(unsigned op, unsigned op2, |
|
1535 |
Register Rd, Register Rn, Register Rm, |
|
1536 |
unsigned cond) { |
|
1537 |
starti; |
|
1538 |
f(op, 31, 29); |
|
1539 |
f(0b11010100, 28, 21); |
|
1540 |
f(cond, 15, 12); |
|
1541 |
f(op2, 11, 10); |
|
1542 |
zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0); |
|
1543 |
} |
|
1544 |
||
1545 |
#define INSN(NAME, op, op2) \ |
|
1546 |
void NAME(Register Rd, Register Rn, Register Rm, Condition cond) { \ |
|
1547 |
conditional_select(op, op2, Rd, Rn, Rm, cond); \ |
|
1548 |
} |
|
1549 |
||
1550 |
INSN(cselw, 0b000, 0b00); |
|
1551 |
INSN(csincw, 0b000, 0b01); |
|
1552 |
INSN(csinvw, 0b010, 0b00); |
|
1553 |
INSN(csnegw, 0b010, 0b01); |
|
1554 |
INSN(csel, 0b100, 0b00); |
|
1555 |
INSN(csinc, 0b100, 0b01); |
|
1556 |
INSN(csinv, 0b110, 0b00); |
|
1557 |
INSN(csneg, 0b110, 0b01); |
|
1558 |
||
1559 |
#undef INSN |
|
1560 |
||
1561 |
// Data processing |
|
1562 |
void data_processing(unsigned op29, unsigned opcode, |
|
1563 |
Register Rd, Register Rn) { |
|
1564 |
f(op29, 31, 29), f(0b11010110, 28, 21); |
|
1565 |
f(opcode, 15, 10); |
|
1566 |
rf(Rn, 5), rf(Rd, 0); |
|
1567 |
} |
|
1568 |
||
1569 |
// (1 source) |
|
1570 |
#define INSN(NAME, op29, opcode2, opcode) \ |
|
1571 |
void NAME(Register Rd, Register Rn) { \ |
|
1572 |
starti; \ |
|
1573 |
f(opcode2, 20, 16); \ |
|
1574 |
data_processing(op29, opcode, Rd, Rn); \ |
|
1575 |
} |
|
1576 |
||
1577 |
INSN(rbitw, 0b010, 0b00000, 0b00000); |
|
1578 |
INSN(rev16w, 0b010, 0b00000, 0b00001); |
|
1579 |
INSN(revw, 0b010, 0b00000, 0b00010); |
|
1580 |
INSN(clzw, 0b010, 0b00000, 0b00100); |
|
1581 |
INSN(clsw, 0b010, 0b00000, 0b00101); |
|
1582 |
||
1583 |
INSN(rbit, 0b110, 0b00000, 0b00000); |
|
1584 |
INSN(rev16, 0b110, 0b00000, 0b00001); |
|
1585 |
INSN(rev32, 0b110, 0b00000, 0b00010); |
|
1586 |
INSN(rev, 0b110, 0b00000, 0b00011); |
|
1587 |
INSN(clz, 0b110, 0b00000, 0b00100); |
|
1588 |
INSN(cls, 0b110, 0b00000, 0b00101); |
|
1589 |
||
1590 |
#undef INSN |
|
1591 |
||
1592 |
// (2 sources) |
|
1593 |
#define INSN(NAME, op29, opcode) \ |
|
1594 |
void NAME(Register Rd, Register Rn, Register Rm) { \ |
|
1595 |
starti; \ |
|
1596 |
rf(Rm, 16); \ |
|
1597 |
data_processing(op29, opcode, Rd, Rn); \ |
|
1598 |
} |
|
1599 |
||
1600 |
INSN(udivw, 0b000, 0b000010); |
|
1601 |
INSN(sdivw, 0b000, 0b000011); |
|
1602 |
INSN(lslvw, 0b000, 0b001000); |
|
1603 |
INSN(lsrvw, 0b000, 0b001001); |
|
1604 |
INSN(asrvw, 0b000, 0b001010); |
|
1605 |
INSN(rorvw, 0b000, 0b001011); |
|
1606 |
||
1607 |
INSN(udiv, 0b100, 0b000010); |
|
1608 |
INSN(sdiv, 0b100, 0b000011); |
|
1609 |
INSN(lslv, 0b100, 0b001000); |
|
1610 |
INSN(lsrv, 0b100, 0b001001); |
|
1611 |
INSN(asrv, 0b100, 0b001010); |
|
1612 |
INSN(rorv, 0b100, 0b001011); |
|
1613 |
||
1614 |
#undef INSN |
|
1615 |
||
1616 |
// (3 sources) |
|
1617 |
void data_processing(unsigned op54, unsigned op31, unsigned o0, |
|
1618 |
Register Rd, Register Rn, Register Rm, |
|
1619 |
Register Ra) { |
|
1620 |
starti; |
|
1621 |
f(op54, 31, 29), f(0b11011, 28, 24); |
|
1622 |
f(op31, 23, 21), f(o0, 15); |
|
1623 |
zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0); |
|
1624 |
} |
|
1625 |
||
1626 |
#define INSN(NAME, op54, op31, o0) \ |
|
1627 |
void NAME(Register Rd, Register Rn, Register Rm, Register Ra) { \ |
|
1628 |
data_processing(op54, op31, o0, Rd, Rn, Rm, Ra); \ |
|
1629 |
} |
|
1630 |
||
1631 |
INSN(maddw, 0b000, 0b000, 0); |
|
1632 |
INSN(msubw, 0b000, 0b000, 1); |
|
1633 |
INSN(madd, 0b100, 0b000, 0); |
|
1634 |
INSN(msub, 0b100, 0b000, 1); |
|
1635 |
INSN(smaddl, 0b100, 0b001, 0); |
|
1636 |
INSN(smsubl, 0b100, 0b001, 1); |
|
1637 |
INSN(umaddl, 0b100, 0b101, 0); |
|
1638 |
INSN(umsubl, 0b100, 0b101, 1); |
|
1639 |
||
1640 |
#undef INSN |
|
1641 |
||
1642 |
#define INSN(NAME, op54, op31, o0) \ |
|
1643 |
void NAME(Register Rd, Register Rn, Register Rm) { \ |
|
1644 |
data_processing(op54, op31, o0, Rd, Rn, Rm, (Register)31); \ |
|
1645 |
} |
|
1646 |
||
1647 |
INSN(smulh, 0b100, 0b010, 0); |
|
1648 |
INSN(umulh, 0b100, 0b110, 0); |
|
1649 |
||
1650 |
#undef INSN |
|
1651 |
||
1652 |
// Floating-point data-processing (1 source) |
|
1653 |
void data_processing(unsigned op31, unsigned type, unsigned opcode, |
|
1654 |
FloatRegister Vd, FloatRegister Vn) { |
|
1655 |
starti; |
|
1656 |
f(op31, 31, 29); |
|
1657 |
f(0b11110, 28, 24); |
|
1658 |
f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10); |
|
1659 |
rf(Vn, 5), rf(Vd, 0); |
|
1660 |
} |
|
1661 |
||
1662 |
#define INSN(NAME, op31, type, opcode) \ |
|
1663 |
void NAME(FloatRegister Vd, FloatRegister Vn) { \ |
|
1664 |
data_processing(op31, type, opcode, Vd, Vn); \ |
|
1665 |
} |
|
1666 |
||
1667 |
private: |
|
1668 |
INSN(i_fmovs, 0b000, 0b00, 0b000000); |
|
1669 |
public: |
|
1670 |
INSN(fabss, 0b000, 0b00, 0b000001); |
|
1671 |
INSN(fnegs, 0b000, 0b00, 0b000010); |
|
1672 |
INSN(fsqrts, 0b000, 0b00, 0b000011); |
|
1673 |
INSN(fcvts, 0b000, 0b00, 0b000101); // Single-precision to double-precision |
|
1674 |
||
1675 |
private: |
|
1676 |
INSN(i_fmovd, 0b000, 0b01, 0b000000); |
|
1677 |
public: |
|
1678 |
INSN(fabsd, 0b000, 0b01, 0b000001); |
|
1679 |
INSN(fnegd, 0b000, 0b01, 0b000010); |
|
1680 |
INSN(fsqrtd, 0b000, 0b01, 0b000011); |
|
1681 |
INSN(fcvtd, 0b000, 0b01, 0b000100); // Double-precision to single-precision |
|
1682 |
||
1683 |
void fmovd(FloatRegister Vd, FloatRegister Vn) { |
|
1684 |
assert(Vd != Vn, "should be"); |
|
1685 |
i_fmovd(Vd, Vn); |
|
1686 |
} |
|
1687 |
||
1688 |
void fmovs(FloatRegister Vd, FloatRegister Vn) { |
|
1689 |
assert(Vd != Vn, "should be"); |
|
1690 |
i_fmovs(Vd, Vn); |
|
1691 |
} |
|
1692 |
||
1693 |
#undef INSN |
|
1694 |
||
1695 |
// Floating-point data-processing (2 source) |
|
1696 |
void data_processing(unsigned op31, unsigned type, unsigned opcode, |
|
1697 |
FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { |
|
1698 |
starti; |
|
1699 |
f(op31, 31, 29); |
|
1700 |
f(0b11110, 28, 24); |
|
1701 |
f(type, 23, 22), f(1, 21), f(opcode, 15, 12), f(0b10, 11, 10); |
|
1702 |
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); |
|
1703 |
} |
|
1704 |
||
1705 |
#define INSN(NAME, op31, type, opcode) \ |
|
1706 |
void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \ |
|
1707 |
data_processing(op31, type, opcode, Vd, Vn, Vm); \ |
|
1708 |
} |
|
1709 |
||
1710 |
INSN(fmuls, 0b000, 0b00, 0b0000); |
|
1711 |
INSN(fdivs, 0b000, 0b00, 0b0001); |
|
1712 |
INSN(fadds, 0b000, 0b00, 0b0010); |
|
1713 |
INSN(fsubs, 0b000, 0b00, 0b0011); |
|
1714 |
INSN(fnmuls, 0b000, 0b00, 0b1000); |
|
1715 |
||
1716 |
INSN(fmuld, 0b000, 0b01, 0b0000); |
|
1717 |
INSN(fdivd, 0b000, 0b01, 0b0001); |
|
1718 |
INSN(faddd, 0b000, 0b01, 0b0010); |
|
1719 |
INSN(fsubd, 0b000, 0b01, 0b0011); |
|
1720 |
INSN(fnmuld, 0b000, 0b01, 0b1000); |
|
1721 |
||
1722 |
#undef INSN |
|
1723 |
||
1724 |
// Floating-point data-processing (3 source) |
|
1725 |
void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0, |
|
1726 |
FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, |
|
1727 |
FloatRegister Va) { |
|
1728 |
starti; |
|
1729 |
f(op31, 31, 29); |
|
1730 |
f(0b11111, 28, 24); |
|
1731 |
f(type, 23, 22), f(o1, 21), f(o0, 15); |
|
1732 |
rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0); |
|
1733 |
} |
|
1734 |
||
1735 |
#define INSN(NAME, op31, type, o1, o0) \ |
|
1736 |
void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, \ |
|
1737 |
FloatRegister Va) { \ |
|
1738 |
data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va); \ |
|
1739 |
} |
|
1740 |
||
1741 |
INSN(fmadds, 0b000, 0b00, 0, 0); |
|
1742 |
INSN(fmsubs, 0b000, 0b00, 0, 1); |
|
1743 |
INSN(fnmadds, 0b000, 0b00, 1, 0); |
|
1744 |
INSN(fnmsubs, 0b000, 0b00, 1, 1); |
|
1745 |
||
1746 |
INSN(fmaddd, 0b000, 0b01, 0, 0); |
|
1747 |
INSN(fmsubd, 0b000, 0b01, 0, 1); |
|
1748 |
INSN(fnmaddd, 0b000, 0b01, 1, 0); |
|
1749 |
INSN(fnmsub, 0b000, 0b01, 1, 1); |
|
1750 |
||
1751 |
#undef INSN |
|
1752 |
||
1753 |
// Floating-point conditional select |
|
1754 |
void fp_conditional_select(unsigned op31, unsigned type, |
|
1755 |
unsigned op1, unsigned op2, |
|
1756 |
Condition cond, FloatRegister Vd, |
|
1757 |
FloatRegister Vn, FloatRegister Vm) { |
|
1758 |
starti; |
|
1759 |
f(op31, 31, 29); |
|
1760 |
f(0b11110, 28, 24); |
|
1761 |
f(type, 23, 22); |
|
1762 |
f(op1, 21, 21); |
|
1763 |
f(op2, 11, 10); |
|
1764 |
f(cond, 15, 12); |
|
1765 |
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); |
|
1766 |
} |
|
1767 |
||
1768 |
#define INSN(NAME, op31, type, op1, op2) \ |
|
1769 |
void NAME(FloatRegister Vd, FloatRegister Vn, \ |
|
1770 |
FloatRegister Vm, Condition cond) { \ |
|
1771 |
fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm); \ |
|
1772 |
} |
|
1773 |
||
1774 |
INSN(fcsels, 0b000, 0b00, 0b1, 0b11); |
|
1775 |
INSN(fcseld, 0b000, 0b01, 0b1, 0b11); |
|
1776 |
||
1777 |
#undef INSN |
|
1778 |
||
1779 |
// Floating-point<->integer conversions |
|
1780 |
void float_int_convert(unsigned op31, unsigned type, |
|
1781 |
unsigned rmode, unsigned opcode, |
|
1782 |
Register Rd, Register Rn) { |
|
1783 |
starti; |
|
1784 |
f(op31, 31, 29); |
|
1785 |
f(0b11110, 28, 24); |
|
1786 |
f(type, 23, 22), f(1, 21), f(rmode, 20, 19); |
|
1787 |
f(opcode, 18, 16), f(0b000000, 15, 10); |
|
1788 |
zrf(Rn, 5), zrf(Rd, 0); |
|
1789 |
} |
|
1790 |
||
1791 |
#define INSN(NAME, op31, type, rmode, opcode) \ |
|
1792 |
void NAME(Register Rd, FloatRegister Vn) { \ |
|
1793 |
float_int_convert(op31, type, rmode, opcode, Rd, (Register)Vn); \ |
|
1794 |
} |
|
1795 |
||
1796 |
INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000); |
|
1797 |
INSN(fcvtzs, 0b100, 0b00, 0b11, 0b000); |
|
1798 |
INSN(fcvtzdw, 0b000, 0b01, 0b11, 0b000); |
|
1799 |
INSN(fcvtzd, 0b100, 0b01, 0b11, 0b000); |
|
1800 |
||
1801 |
INSN(fmovs, 0b000, 0b00, 0b00, 0b110); |
|
1802 |
INSN(fmovd, 0b100, 0b01, 0b00, 0b110); |
|
1803 |
||
1804 |
// INSN(fmovhid, 0b100, 0b10, 0b01, 0b110); |
|
1805 |
||
1806 |
#undef INSN |
|
1807 |
||
1808 |
#define INSN(NAME, op31, type, rmode, opcode) \ |
|
1809 |
void NAME(FloatRegister Vd, Register Rn) { \ |
|
1810 |
float_int_convert(op31, type, rmode, opcode, (Register)Vd, Rn); \ |
|
1811 |
} |
|
1812 |
||
1813 |
INSN(fmovs, 0b000, 0b00, 0b00, 0b111); |
|
1814 |
INSN(fmovd, 0b100, 0b01, 0b00, 0b111); |
|
1815 |
||
1816 |
INSN(scvtfws, 0b000, 0b00, 0b00, 0b010); |
|
1817 |
INSN(scvtfs, 0b100, 0b00, 0b00, 0b010); |
|
1818 |
INSN(scvtfwd, 0b000, 0b01, 0b00, 0b010); |
|
1819 |
INSN(scvtfd, 0b100, 0b01, 0b00, 0b010); |
|
1820 |
||
1821 |
// INSN(fmovhid, 0b100, 0b10, 0b01, 0b111); |
|
1822 |
||
1823 |
#undef INSN |
|
1824 |
||
1825 |
// Floating-point compare |
|
1826 |
void float_compare(unsigned op31, unsigned type, |
|
1827 |
unsigned op, unsigned op2, |
|
1828 |
FloatRegister Vn, FloatRegister Vm = (FloatRegister)0) { |
|
1829 |
starti; |
|
1830 |
f(op31, 31, 29); |
|
1831 |
f(0b11110, 28, 24); |
|
1832 |
f(type, 23, 22), f(1, 21); |
|
1833 |
f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0); |
|
1834 |
rf(Vn, 5), rf(Vm, 16); |
|
1835 |
} |
|
1836 |
||
1837 |
||
1838 |
#define INSN(NAME, op31, type, op, op2) \ |
|
1839 |
void NAME(FloatRegister Vn, FloatRegister Vm) { \ |
|
1840 |
float_compare(op31, type, op, op2, Vn, Vm); \ |
|
1841 |
} |
|
1842 |
||
1843 |
#define INSN1(NAME, op31, type, op, op2) \ |
|
1844 |
void NAME(FloatRegister Vn, double d) { \ |
|
1845 |
assert_cond(d == 0.0); \ |
|
1846 |
float_compare(op31, type, op, op2, Vn); \ |
|
1847 |
} |
|
1848 |
||
1849 |
INSN(fcmps, 0b000, 0b00, 0b00, 0b00000); |
|
1850 |
INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000); |
|
1851 |
// INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000); |
|
1852 |
// INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000); |
|
1853 |
||
1854 |
INSN(fcmpd, 0b000, 0b01, 0b00, 0b00000); |
|
1855 |
INSN1(fcmpd, 0b000, 0b01, 0b00, 0b01000); |
|
1856 |
// INSN(fcmped, 0b000, 0b01, 0b00, 0b10000); |
|
1857 |
// INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000); |
|
1858 |
||
1859 |
#undef INSN |
|
1860 |
#undef INSN1 |
|
1861 |
||
1862 |
// Floating-point Move (immediate) |
|
1863 |
private: |
|
1864 |
unsigned pack(double value); |
|
1865 |
||
1866 |
void fmov_imm(FloatRegister Vn, double value, unsigned size) { |
|
1867 |
starti; |
|
1868 |
f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21); |
|
1869 |
f(pack(value), 20, 13), f(0b10000000, 12, 5); |
|
1870 |
rf(Vn, 0); |
|
1871 |
} |
|
1872 |
||
1873 |
public: |
|
1874 |
||
1875 |
void fmovs(FloatRegister Vn, double value) { |
|
1876 |
if (value) |
|
1877 |
fmov_imm(Vn, value, 0b00); |
|
1878 |
else |
|
1879 |
fmovs(Vn, zr); |
|
1880 |
} |
|
1881 |
void fmovd(FloatRegister Vn, double value) { |
|
1882 |
if (value) |
|
1883 |
fmov_imm(Vn, value, 0b01); |
|
1884 |
else |
|
1885 |
fmovd(Vn, zr); |
|
1886 |
} |
|
1887 |
||
1888 |
/* SIMD extensions |
|
1889 |
* |
|
1890 |
* We just use FloatRegister in the following. They are exactly the same |
|
1891 |
* as SIMD registers. |
|
1892 |
*/ |
|
1893 |
public: |
|
1894 |
||
1895 |
enum SIMD_Arrangement { |
|
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
1896 |
T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q |
29183 | 1897 |
}; |
1898 |
||
1899 |
enum SIMD_RegVariant { |
|
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1900 |
B, H, S, D, Q |
29183 | 1901 |
}; |
1902 |
||
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1903 |
#define INSN(NAME, op) \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1904 |
void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) { \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1905 |
ld_st2((Register)Rt, adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1906 |
} \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1907 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1908 |
INSN(ldr, 1); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1909 |
INSN(str, 0); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1910 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
1911 |
#undef INSN |
29183 | 1912 |
|
1913 |
private: |
|
1914 |
||
1915 |
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) { |
|
1916 |
starti; |
|
1917 |
f(0,31), f((int)T & 1, 30); |
|
1918 |
f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12); |
|
1919 |
f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); |
|
1920 |
} |
|
1921 |
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, |
|
1922 |
int imm, int op1, int op2) { |
|
1923 |
starti; |
|
1924 |
f(0,31), f((int)T & 1, 30); |
|
1925 |
f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12); |
|
1926 |
f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); |
|
1927 |
} |
|
1928 |
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, |
|
1929 |
Register Xm, int op1, int op2) { |
|
1930 |
starti; |
|
1931 |
f(0,31), f((int)T & 1, 30); |
|
1932 |
f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12); |
|
1933 |
f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); |
|
1934 |
} |
|
1935 |
||
1936 |
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) { |
|
1937 |
switch (a.getMode()) { |
|
1938 |
case Address::base_plus_offset: |
|
1939 |
guarantee(a.offset() == 0, "no offset allowed here"); |
|
1940 |
ld_st(Vt, T, a.base(), op1, op2); |
|
1941 |
break; |
|
1942 |
case Address::post: |
|
1943 |
ld_st(Vt, T, a.base(), a.offset(), op1, op2); |
|
1944 |
break; |
|
1945 |
case Address::base_plus_offset_reg: |
|
1946 |
ld_st(Vt, T, a.base(), a.index(), op1, op2); |
|
1947 |
break; |
|
1948 |
default: |
|
1949 |
ShouldNotReachHere(); |
|
1950 |
} |
|
1951 |
} |
|
1952 |
||
1953 |
public: |
|
1954 |
||
1955 |
#define INSN1(NAME, op1, op2) \ |
|
1956 |
void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) { \ |
|
1957 |
ld_st(Vt, T, a, op1, op2); \ |
|
1958 |
} |
|
1959 |
||
1960 |
#define INSN2(NAME, op1, op2) \ |
|
1961 |
void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \ |
|
1962 |
assert(Vt->successor() == Vt2, "Registers must be ordered"); \ |
|
1963 |
ld_st(Vt, T, a, op1, op2); \ |
|
1964 |
} |
|
1965 |
||
1966 |
#define INSN3(NAME, op1, op2) \ |
|
1967 |
void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ |
|
1968 |
SIMD_Arrangement T, const Address &a) { \ |
|
1969 |
assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \ |
|
1970 |
"Registers must be ordered"); \ |
|
1971 |
ld_st(Vt, T, a, op1, op2); \ |
|
1972 |
} |
|
1973 |
||
1974 |
#define INSN4(NAME, op1, op2) \ |
|
1975 |
void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ |
|
1976 |
FloatRegister Vt4, SIMD_Arrangement T, const Address &a) { \ |
|
1977 |
assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \ |
|
1978 |
Vt3->successor() == Vt4, "Registers must be ordered"); \ |
|
1979 |
ld_st(Vt, T, a, op1, op2); \ |
|
1980 |
} |
|
1981 |
||
1982 |
INSN1(ld1, 0b001100010, 0b0111); |
|
1983 |
INSN2(ld1, 0b001100010, 0b1010); |
|
1984 |
INSN3(ld1, 0b001100010, 0b0110); |
|
1985 |
INSN4(ld1, 0b001100010, 0b0010); |
|
1986 |
||
1987 |
INSN2(ld2, 0b001100010, 0b1000); |
|
1988 |
INSN3(ld3, 0b001100010, 0b0100); |
|
1989 |
INSN4(ld4, 0b001100010, 0b0000); |
|
1990 |
||
1991 |
INSN1(st1, 0b001100000, 0b0111); |
|
1992 |
INSN2(st1, 0b001100000, 0b1010); |
|
1993 |
INSN3(st1, 0b001100000, 0b0110); |
|
1994 |
INSN4(st1, 0b001100000, 0b0010); |
|
1995 |
||
1996 |
INSN2(st2, 0b001100000, 0b1000); |
|
1997 |
INSN3(st3, 0b001100000, 0b0100); |
|
1998 |
INSN4(st4, 0b001100000, 0b0000); |
|
1999 |
||
2000 |
INSN1(ld1r, 0b001101010, 0b1100); |
|
2001 |
INSN2(ld2r, 0b001101011, 0b1100); |
|
2002 |
INSN3(ld3r, 0b001101010, 0b1110); |
|
2003 |
INSN4(ld4r, 0b001101011, 0b1110); |
|
2004 |
||
2005 |
#undef INSN1 |
|
2006 |
#undef INSN2 |
|
2007 |
#undef INSN3 |
|
2008 |
#undef INSN4 |
|
2009 |
||
2010 |
#define INSN(NAME, opc) \ |
|
2011 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ |
|
2012 |
starti; \ |
|
2013 |
assert(T == T8B || T == T16B, "must be T8B or T16B"); \ |
|
2014 |
f(0, 31), f((int)T & 1, 30), f(opc, 29, 21); \ |
|
2015 |
rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \ |
|
2016 |
} |
|
2017 |
||
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2018 |
INSN(eor, 0b101110001); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2019 |
INSN(orr, 0b001110101); |
29183 | 2020 |
INSN(andr, 0b001110001); |
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2021 |
INSN(bic, 0b001110011); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2022 |
INSN(bif, 0b101110111); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2023 |
INSN(bit, 0b101110101); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2024 |
INSN(bsl, 0b101110011); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2025 |
INSN(orn, 0b001110111); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2026 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2027 |
#undef INSN |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2028 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2029 |
#define INSN(NAME, opc, opc2) \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2030 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2031 |
starti; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2032 |
f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2033 |
f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2034 |
rf(Vn, 5), rf(Vd, 0); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2035 |
} |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2036 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2037 |
INSN(addv, 0, 0b100001); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2038 |
INSN(subv, 1, 0b100001); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2039 |
INSN(mulv, 0, 0b100111); |
35096
ac7a4a87c3c2
8144587: aarch64: generate vectorized MLA/MLS instructions
fyang
parents:
33088
diff
changeset
|
2040 |
INSN(mlav, 0, 0b100101); |
ac7a4a87c3c2
8144587: aarch64: generate vectorized MLA/MLS instructions
fyang
parents:
33088
diff
changeset
|
2041 |
INSN(mlsv, 1, 0b100101); |
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2042 |
INSN(sshl, 0, 0b010001); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2043 |
INSN(ushl, 1, 0b010001); |
29183 | 2044 |
|
2045 |
#undef INSN |
|
2046 |
||
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2047 |
#define INSN(NAME, opc, opc2) \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2048 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ |
29183 | 2049 |
starti; \ |
2050 |
f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ |
|
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2051 |
f((int)T >> 1, 23, 22), f(opc2, 21, 10); \ |
29183 | 2052 |
rf(Vn, 5), rf(Vd, 0); \ |
2053 |
} |
|
2054 |
||
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2055 |
INSN(absr, 0, 0b100000101110); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2056 |
INSN(negr, 1, 0b100000101110); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2057 |
INSN(notr, 1, 0b100000010110); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2058 |
INSN(addv, 0, 0b110001101110); |
31517 | 2059 |
INSN(cls, 0, 0b100000010010); |
2060 |
INSN(clz, 1, 0b100000010010); |
|
2061 |
INSN(cnt, 0, 0b100000010110); |
|
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2062 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2063 |
#undef INSN |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2064 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2065 |
#define INSN(NAME, op0, cmode0) \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2066 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) { \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2067 |
unsigned cmode = cmode0; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2068 |
unsigned op = op0; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2069 |
starti; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2070 |
assert(lsl == 0 || \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2071 |
((T == T4H || T == T8H) && lsl == 8) || \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2072 |
((T == T2S || T == T4S) && ((lsl >> 3) < 4)), "invalid shift"); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2073 |
cmode |= lsl >> 2; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2074 |
if (T == T4H || T == T8H) cmode |= 0b1000; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2075 |
if (!(T == T4H || T == T8H || T == T2S || T == T4S)) { \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2076 |
assert(op == 0 && cmode0 == 0, "must be MOVI"); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2077 |
cmode = 0b1110; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2078 |
if (T == T1D || T == T2D) op = 1; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2079 |
} \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2080 |
f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2081 |
f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2082 |
rf(Vd, 0); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2083 |
} |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2084 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2085 |
INSN(movi, 0, 0); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2086 |
INSN(orri, 0, 1); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2087 |
INSN(mvni, 1, 0); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2088 |
INSN(bici, 1, 1); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2089 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2090 |
#undef INSN |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2091 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2092 |
#define INSN(NAME, op1, op2, op3) \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2093 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2094 |
starti; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2095 |
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2096 |
f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2097 |
f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2098 |
} |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2099 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2100 |
INSN(fadd, 0, 0, 0b110101); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2101 |
INSN(fdiv, 1, 0, 0b111111); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2102 |
INSN(fmul, 1, 0, 0b110111); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2103 |
INSN(fsub, 0, 1, 0b110101); |
29183 | 2104 |
|
2105 |
#undef INSN |
|
2106 |
||
2107 |
#define INSN(NAME, opc) \ |
|
2108 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ |
|
2109 |
starti; \ |
|
2110 |
assert(T == T4S, "arrangement must be T4S"); \ |
|
2111 |
f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ |
|
2112 |
} |
|
2113 |
||
2114 |
INSN(sha1c, 0b000000); |
|
2115 |
INSN(sha1m, 0b001000); |
|
2116 |
INSN(sha1p, 0b000100); |
|
2117 |
INSN(sha1su0, 0b001100); |
|
2118 |
INSN(sha256h2, 0b010100); |
|
2119 |
INSN(sha256h, 0b010000); |
|
2120 |
INSN(sha256su1, 0b011000); |
|
2121 |
||
2122 |
#undef INSN |
|
2123 |
||
2124 |
#define INSN(NAME, opc) \ |
|
2125 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ |
|
2126 |
starti; \ |
|
2127 |
assert(T == T4S, "arrangement must be T4S"); \ |
|
2128 |
f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ |
|
2129 |
} |
|
2130 |
||
2131 |
INSN(sha1h, 0b000010); |
|
2132 |
INSN(sha1su1, 0b000110); |
|
2133 |
INSN(sha256su0, 0b001010); |
|
2134 |
||
2135 |
#undef INSN |
|
2136 |
||
2137 |
#define INSN(NAME, opc) \ |
|
2138 |
void NAME(FloatRegister Vd, FloatRegister Vn) { \ |
|
2139 |
starti; \ |
|
2140 |
f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \ |
|
2141 |
} |
|
2142 |
||
2143 |
INSN(aese, 0b0100111000101000010010); |
|
2144 |
INSN(aesd, 0b0100111000101000010110); |
|
2145 |
INSN(aesmc, 0b0100111000101000011010); |
|
2146 |
INSN(aesimc, 0b0100111000101000011110); |
|
2147 |
||
2148 |
#undef INSN |
|
2149 |
||
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2150 |
void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) { |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2151 |
starti; |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2152 |
assert(T != Q, "invalid register variant"); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2153 |
f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2154 |
f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2155 |
} |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2156 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2157 |
void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { |
29183 | 2158 |
starti; |
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2159 |
f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2160 |
f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2161 |
rf(Vn, 5), rf(Rd, 0); |
29183 | 2162 |
} |
2163 |
||
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2164 |
#define INSN(NAME, opc, opc2) \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2165 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2166 |
starti; \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2167 |
/* The encodings for the immh:immb fields (bits 22:16) are \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2168 |
* 0001 xxx 8B/16B, shift = xxx \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2169 |
* 001x xxx 4H/8H, shift = xxxx \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2170 |
* 01xx xxx 2S/4S, shift = xxxxx \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2171 |
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2172 |
*/ \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2173 |
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2174 |
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2175 |
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2176 |
} |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2177 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2178 |
INSN(shl, 0, 0b010101); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2179 |
INSN(sshr, 0, 0b000001); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2180 |
INSN(ushr, 1, 0b000001); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2181 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2182 |
#undef INSN |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2183 |
|
29183 | 2184 |
void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { |
2185 |
starti; |
|
2186 |
/* The encodings for the immh:immb fields (bits 22:16) are |
|
2187 |
* 0001 xxx 8H, 8B/16b shift = xxx |
|
2188 |
* 001x xxx 4S, 4H/8H shift = xxxx |
|
2189 |
* 01xx xxx 2D, 2S/4S shift = xxxxx |
|
2190 |
* 1xxx xxx RESERVED |
|
2191 |
*/ |
|
2192 |
assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement"); |
|
2193 |
assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value"); |
|
2194 |
f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16); |
|
2195 |
f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0); |
|
2196 |
} |
|
2197 |
void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { |
|
2198 |
ushll(Vd, Ta, Vn, Tb, shift); |
|
2199 |
} |
|
2200 |
||
2201 |
void uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){ |
|
2202 |
starti; |
|
2203 |
f(0, 31), f((T & 0x1), 30), f(0b001110, 29, 24), f((T >> 1), 23, 22), f(0, 21); |
|
2204 |
rf(Vm, 16), f(0, 15), f(op, 14), f(0b0110, 13, 10), rf(Vn, 5), rf(Vd, 0); |
|
2205 |
} |
|
2206 |
void uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){ |
|
2207 |
uzp1(Vd, Vn, Vm, T, 1); |
|
2208 |
} |
|
2209 |
||
2210 |
// Move from general purpose register |
|
2211 |
// mov Vd.T[index], Rn |
|
2212 |
void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) { |
|
2213 |
starti; |
|
2214 |
f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); |
|
2215 |
f(0b000111, 15, 10), rf(Xn, 5), rf(Vd, 0); |
|
2216 |
} |
|
2217 |
||
2218 |
// Move to general purpose register |
|
2219 |
// mov Rd, Vn.T[index] |
|
2220 |
void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) { |
|
2221 |
starti; |
|
2222 |
f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21); |
|
2223 |
f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); |
|
2224 |
f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0); |
|
2225 |
} |
|
2226 |
||
2227 |
void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { |
|
2228 |
starti; |
|
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2229 |
assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) || |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2230 |
(Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier"); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2231 |
int size = (Ta == T1Q) ? 0b11 : 0b00; |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2232 |
f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2233 |
f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0); |
29183 | 2234 |
} |
2235 |
void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { |
|
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2236 |
assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier"); |
29183 | 2237 |
pmull(Vd, Ta, Vn, Vm, Tb); |
2238 |
} |
|
2239 |
||
2240 |
void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) { |
|
2241 |
starti; |
|
2242 |
int size_b = (int)Tb >> 1; |
|
2243 |
int size_a = (int)Ta >> 1; |
|
2244 |
assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier"); |
|
2245 |
f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22); |
|
2246 |
f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0); |
|
2247 |
} |
|
2248 |
||
30890
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2249 |
void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs) |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2250 |
{ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2251 |
starti; |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2252 |
assert(T != T1D, "reserved encoding"); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2253 |
f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2254 |
f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), rf(Xs, 5), rf(Vd, 0); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2255 |
} |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2256 |
|
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2257 |
void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0) |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2258 |
{ |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2259 |
starti; |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2260 |
assert(T != T1D, "reserved encoding"); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2261 |
f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2262 |
f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2263 |
f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0); |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2264 |
} |
dbbc65d3cd40
8079565: aarch64: Add vectorization support for aarch64
enevill
parents:
30225
diff
changeset
|
2265 |
|
29183 | 2266 |
// CRC32 instructions |
31591
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2267 |
#define INSN(NAME, c, sf, sz) \ |
29183 | 2268 |
void NAME(Register Rd, Register Rn, Register Rm) { \ |
2269 |
starti; \ |
|
31591
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2270 |
f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12); \ |
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2271 |
f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ |
29183 | 2272 |
} |
2273 |
||
31591
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2274 |
INSN(crc32b, 0, 0, 0b00); |
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2275 |
INSN(crc32h, 0, 0, 0b01); |
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2276 |
INSN(crc32w, 0, 0, 0b10); |
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2277 |
INSN(crc32x, 0, 1, 0b11); |
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2278 |
INSN(crc32cb, 1, 0, 0b00); |
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2279 |
INSN(crc32ch, 1, 0, 0b01); |
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2280 |
INSN(crc32cw, 1, 0, 0b10); |
82134a118aea
8130687: aarch64: add support for hardware crc32c
enevill
parents:
31517
diff
changeset
|
2281 |
INSN(crc32cx, 1, 1, 0b11); |
29183 | 2282 |
|
2283 |
#undef INSN |
|
2284 |
||
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2285 |
// Table vector lookup |
32574 | 2286 |
#define INSN(NAME, op) \ |
2287 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \ |
|
2288 |
starti; \ |
|
2289 |
assert(T == T8B || T == T16B, "invalid arrangement"); \ |
|
2290 |
assert(0 < registers && registers <= 4, "invalid number of registers"); \ |
|
2291 |
f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \ |
|
2292 |
f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \ |
|
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2293 |
} |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2294 |
|
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2295 |
INSN(tbl, 0); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2296 |
INSN(tbx, 1); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2297 |
|
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2298 |
#undef INSN |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2299 |
|
32574 | 2300 |
// AdvSIMD two-reg misc |
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2301 |
#define INSN(NAME, U, opcode) \ |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2302 |
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2303 |
starti; \ |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2304 |
assert((ASSERTION), MSG); \ |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2305 |
f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24); \ |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2306 |
f((int)(T >> 1), 23, 22), f(0b10000, 21, 17), f(opcode, 16, 12); \ |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2307 |
f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0); \ |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2308 |
} |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2309 |
|
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2310 |
#define MSG "invalid arrangement" |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2311 |
|
33085
32f5ee7f0ba8
8135231: aarch64: add support for vectorizing double precision sqrt
enevill
parents:
32574
diff
changeset
|
2312 |
#define ASSERTION (T == T2S || T == T4S || T == T2D) |
32f5ee7f0ba8
8135231: aarch64: add support for vectorizing double precision sqrt
enevill
parents:
32574
diff
changeset
|
2313 |
INSN(fsqrt, 1, 0b11111); |
33088
34fe49ecee13
8138583: aarch64: add support for vectorizing fabs/fneg
enevill
parents:
33085
diff
changeset
|
2314 |
INSN(fabs, 0, 0b01111); |
34fe49ecee13
8138583: aarch64: add support for vectorizing fabs/fneg
enevill
parents:
33085
diff
changeset
|
2315 |
INSN(fneg, 1, 0b01111); |
33085
32f5ee7f0ba8
8135231: aarch64: add support for vectorizing double precision sqrt
enevill
parents:
32574
diff
changeset
|
2316 |
#undef ASSERTION |
32f5ee7f0ba8
8135231: aarch64: add support for vectorizing double precision sqrt
enevill
parents:
32574
diff
changeset
|
2317 |
|
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2318 |
#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S) |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2319 |
INSN(rev64, 0, 0b00000); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2320 |
#undef ASSERTION |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2321 |
|
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2322 |
#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H) |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2323 |
INSN(rev32, 1, 0b00000); |
32574 | 2324 |
private: |
2325 |
INSN(_rbit, 1, 0b00101); |
|
2326 |
public: |
|
2327 |
||
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2328 |
#undef ASSERTION |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2329 |
|
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2330 |
#define ASSERTION (T == T8B || T == T16B) |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2331 |
INSN(rev16, 0, 0b00001); |
32574 | 2332 |
// RBIT only allows T8B and T16B but encodes them oddly. Argh... |
2333 |
void rbit(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { |
|
2334 |
assert((ASSERTION), MSG); |
|
2335 |
_rbit(Vd, SIMD_Arrangement(T & 1 | 0b010), Vn); |
|
2336 |
} |
|
31961
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2337 |
#undef ASSERTION |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2338 |
|
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2339 |
#undef MSG |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2340 |
|
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2341 |
#undef INSN |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2342 |
|
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2343 |
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2344 |
{ |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2345 |
starti; |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2346 |
assert(T == T8B || T == T16B, "invalid arrangement"); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2347 |
assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value"); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2348 |
f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2349 |
rf(Vm, 16), f(0, 15), f(index, 14, 11); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2350 |
f(0, 10), rf(Vn, 5), rf(Vd, 0); |
70adcff5840c
8131062: aarch64: add support for GHASH acceleration
enevill
parents:
31863
diff
changeset
|
2351 |
} |
29183 | 2352 |
|
2353 |
/* Simulator extensions to the ISA |
|
2354 |
||
2355 |
haltsim |
|
2356 |
||
2357 |
takes no arguments, causes the sim to enter a debug break and then |
|
2358 |
return from the simulator run() call with STATUS_HALT? The linking |
|
2359 |
code will call fatal() when it sees STATUS_HALT. |
|
2360 |
||
2361 |
blrt Xn, Wm |
|
2362 |
blrt Xn, #gpargs, #fpargs, #type |
|
2363 |
Xn holds the 64 bit x86 branch_address |
|
2364 |
call format is encoded either as immediate data in the call |
|
2365 |
or in register Wm. In the latter case |
|
2366 |
Wm[13..6] = #gpargs, |
|
2367 |
Wm[5..2] = #fpargs, |
|
2368 |
Wm[1,0] = #type |
|
2369 |
||
2370 |
calls the x86 code address 'branch_address' supplied in Xn passing |
|
2371 |
arguments taken from the general and floating point registers according |
|
2372 |
to the supplied counts 'gpargs' and 'fpargs'. may return a result in r0 |
|
2373 |
or v0 according to the the return type #type' where |
|
2374 |
||
2375 |
address branch_address; |
|
2376 |
uimm4 gpargs; |
|
2377 |
uimm4 fpargs; |
|
2378 |
enum ReturnType type; |
|
2379 |
||
2380 |
enum ReturnType |
|
2381 |
{ |
|
2382 |
void_ret = 0, |
|
2383 |
int_ret = 1, |
|
2384 |
long_ret = 1, |
|
2385 |
obj_ret = 1, // i.e. same as long |
|
2386 |
float_ret = 2, |
|
2387 |
double_ret = 3 |
|
2388 |
} |
|
2389 |
||
2390 |
notify |
|
2391 |
||
2392 |
notifies the simulator of a transfer of control. instr[14:0] |
|
2393 |
identifies the type of change of control. |
|
2394 |
||
2395 |
0 ==> initial entry to a method. |
|
2396 |
||
2397 |
1 ==> return into a method from a submethod call. |
|
2398 |
||
2399 |
2 ==> exit out of Java method code. |
|
2400 |
||
2401 |
3 ==> start execution for a new bytecode. |
|
2402 |
||
2403 |
in cases 1 and 2 the simulator is expected to use a JVM callback to |
|
2404 |
identify the name of the specific method being executed. in case 4 |
|
2405 |
the simulator is expected to use a JVM callback to identify the |
|
2406 |
bytecode index. |
|
2407 |
||
2408 |
Instruction encodings |
|
2409 |
--------------------- |
|
2410 |
||
2411 |
These are encoded in the space with instr[28:25] = 00 which is |
|
2412 |
unallocated. Encodings are |
|
2413 |
||
2414 |
10987654321098765432109876543210 |
|
2415 |
PSEUDO_HALT = 0x11100000000000000000000000000000 |
|
2416 |
PSEUDO_BLRT = 0x11000000000000000_______________ |
|
2417 |
PSEUDO_BLRTR = 0x1100000000000000100000__________ |
|
2418 |
PSEUDO_NOTIFY = 0x10100000000000000_______________ |
|
2419 |
||
2420 |
instr[31,29] = op1 : 111 ==> HALT, 110 ==> BLRT/BLRTR, 101 ==> NOTIFY |
|
2421 |
||
2422 |
for BLRT |
|
2423 |
instr[14,11] = #gpargs, instr[10,7] = #fpargs |
|
2424 |
instr[6,5] = #type, instr[4,0] = Rn |
|
2425 |
for BLRTR |
|
2426 |
instr[9,5] = Rm, instr[4,0] = Rn |
|
2427 |
for NOTIFY |
|
2428 |
instr[14:0] = type : 0 ==> entry, 1 ==> reentry, 2 ==> exit, 3 ==> bcstart |
|
2429 |
*/ |
|
2430 |
||
2431 |
enum NotifyType { method_entry, method_reentry, method_exit, bytecode_start }; |
|
2432 |
||
2433 |
virtual void notify(int type) { |
|
2434 |
if (UseBuiltinSim) { |
|
2435 |
starti; |
|
2436 |
// 109 |
|
2437 |
f(0b101, 31, 29); |
|
2438 |
// 87654321098765 |
|
2439 |
f(0b00000000000000, 28, 15); |
|
2440 |
f(type, 14, 0); |
|
2441 |
} |
|
2442 |
} |
|
2443 |
||
2444 |
void blrt(Register Rn, int gpargs, int fpargs, int type) { |
|
2445 |
if (UseBuiltinSim) { |
|
2446 |
starti; |
|
2447 |
f(0b110, 31 ,29); |
|
2448 |
f(0b00, 28, 25); |
|
2449 |
// 4321098765 |
|
2450 |
f(0b0000000000, 24, 15); |
|
2451 |
f(gpargs, 14, 11); |
|
2452 |
f(fpargs, 10, 7); |
|
2453 |
f(type, 6, 5); |
|
2454 |
rf(Rn, 0); |
|
2455 |
} else { |
|
2456 |
blr(Rn); |
|
2457 |
} |
|
2458 |
} |
|
2459 |
||
2460 |
void blrt(Register Rn, Register Rm) { |
|
2461 |
if (UseBuiltinSim) { |
|
2462 |
starti; |
|
2463 |
f(0b110, 31 ,29); |
|
2464 |
f(0b00, 28, 25); |
|
2465 |
// 4321098765 |
|
2466 |
f(0b0000000001, 24, 15); |
|
2467 |
// 43210 |
|
2468 |
f(0b00000, 14, 10); |
|
2469 |
rf(Rm, 5); |
|
2470 |
rf(Rn, 0); |
|
2471 |
} else { |
|
2472 |
blr(Rn); |
|
2473 |
} |
|
2474 |
} |
|
2475 |
||
2476 |
void haltsim() { |
|
2477 |
starti; |
|
2478 |
f(0b111, 31 ,29); |
|
2479 |
f(0b00, 28, 27); |
|
2480 |
// 654321098765432109876543210 |
|
2481 |
f(0b000000000000000000000000000, 26, 0); |
|
2482 |
} |
|
2483 |
||
2484 |
Assembler(CodeBuffer* code) : AbstractAssembler(code) { |
|
2485 |
} |
|
2486 |
||
2487 |
virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, |
|
2488 |
Register tmp, |
|
2489 |
int offset) { |
|
2490 |
ShouldNotCallThis(); |
|
2491 |
return RegisterOrConstant(); |
|
2492 |
} |
|
2493 |
||
2494 |
// Stack overflow checking |
|
2495 |
virtual void bang_stack_with_offset(int offset); |
|
2496 |
||
2497 |
static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm); |
|
2498 |
static bool operand_valid_for_add_sub_immediate(long imm); |
|
2499 |
static bool operand_valid_for_float_immediate(double imm); |
|
2500 |
||
2501 |
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); |
|
2502 |
void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); |
|
2503 |
}; |
|
2504 |
||
2505 |
inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a, |
|
2506 |
Assembler::Membar_mask_bits b) { |
|
2507 |
return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b)); |
|
2508 |
} |
|
2509 |
||
2510 |
Instruction_aarch64::~Instruction_aarch64() { |
|
2511 |
assem->emit(); |
|
2512 |
} |
|
2513 |
||
2514 |
#undef starti |
|
2515 |
||
2516 |
// Invert a condition |
|
2517 |
inline const Assembler::Condition operator~(const Assembler::Condition cond) { |
|
2518 |
return Assembler::Condition(int(cond) ^ 1); |
|
2519 |
} |
|
2520 |
||
2521 |
class BiasedLockingCounters; |
|
2522 |
||
2523 |
extern "C" void das(uint64_t start, int len); |
|
2524 |
||
2525 |
#endif // CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP |