51436
|
1 |
/*
|
54084
|
2 |
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
|
51436
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation.
|
|
8 |
*
|
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
13 |
* accompanied this code).
|
|
14 |
*
|
|
15 |
* You should have received a copy of the GNU General Public License version
|
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
18 |
*
|
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
20 |
* or visit www.oracle.com if you need additional information or have any
|
|
21 |
* questions.
|
|
22 |
*/
|
|
23 |
|
|
24 |
|
|
25 |
package org.graalvm.compiler.asm.amd64;
|
|
26 |
|
|
27 |
import static jdk.vm.ci.amd64.AMD64.MASK;
|
|
28 |
import static jdk.vm.ci.amd64.AMD64.XMM;
|
|
29 |
import static jdk.vm.ci.amd64.AMD64.r12;
|
|
30 |
import static jdk.vm.ci.amd64.AMD64.r13;
|
|
31 |
import static jdk.vm.ci.amd64.AMD64.rbp;
|
|
32 |
import static jdk.vm.ci.amd64.AMD64.rsp;
|
|
33 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
|
|
34 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1;
|
|
35 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
|
|
36 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
|
|
37 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
|
|
38 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
|
58299
|
39 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L512;
|
51736
|
40 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
|
51436
|
41 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
|
|
42 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
|
|
43 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
|
|
44 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
|
|
45 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
|
|
46 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
|
|
47 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
|
|
48 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
|
|
49 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
|
|
50 |
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
|
|
51 |
import static org.graalvm.compiler.core.common.NumUtil.isByte;
|
|
52 |
|
|
53 |
import org.graalvm.compiler.asm.Assembler;
|
|
54 |
import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
|
|
55 |
import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
|
|
56 |
import org.graalvm.compiler.debug.GraalError;
|
|
57 |
|
|
58 |
import jdk.vm.ci.amd64.AMD64;
|
|
59 |
import jdk.vm.ci.amd64.AMD64.CPUFeature;
|
|
60 |
import jdk.vm.ci.amd64.AMD64Kind;
|
|
61 |
import jdk.vm.ci.code.Register;
|
52578
|
62 |
import jdk.vm.ci.code.Register.RegisterCategory;
|
51436
|
63 |
import jdk.vm.ci.code.TargetDescription;
|
|
64 |
import jdk.vm.ci.meta.PlatformKind;
|
|
65 |
|
|
66 |
/**
|
|
67 |
* This class implements an assembler that can encode most X86 instructions.
|
|
68 |
*/
|
|
69 |
public abstract class AMD64BaseAssembler extends Assembler {
|
|
70 |
|
|
71 |
private final SIMDEncoder simdEncoder;
|
|
72 |
|
|
73 |
/**
|
|
74 |
* Constructs an assembler for the AMD64 architecture.
|
|
75 |
*/
|
|
76 |
public AMD64BaseAssembler(TargetDescription target) {
|
|
77 |
super(target);
|
|
78 |
|
|
79 |
if (supports(CPUFeature.AVX)) {
|
|
80 |
simdEncoder = new VEXEncoderImpl();
|
|
81 |
} else {
|
|
82 |
simdEncoder = new SSEEncoderImpl();
|
|
83 |
}
|
|
84 |
}
|
|
85 |
|
|
86 |
/**
|
|
87 |
* The x86 operand sizes.
|
|
88 |
*/
|
|
89 |
public enum OperandSize {
|
|
90 |
BYTE(1, AMD64Kind.BYTE) {
|
|
91 |
@Override
|
|
92 |
protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
|
|
93 |
assert imm == (byte) imm;
|
|
94 |
asm.emitByte(imm);
|
|
95 |
}
|
|
96 |
|
|
97 |
@Override
|
|
98 |
protected int immediateSize() {
|
|
99 |
return 1;
|
|
100 |
}
|
|
101 |
},
|
|
102 |
|
|
103 |
WORD(2, AMD64Kind.WORD, 0x66) {
|
|
104 |
@Override
|
|
105 |
protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
|
|
106 |
assert imm == (short) imm;
|
|
107 |
asm.emitShort(imm);
|
|
108 |
}
|
|
109 |
|
|
110 |
@Override
|
|
111 |
protected int immediateSize() {
|
|
112 |
return 2;
|
|
113 |
}
|
|
114 |
},
|
|
115 |
|
|
116 |
DWORD(4, AMD64Kind.DWORD) {
|
|
117 |
@Override
|
|
118 |
protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
|
|
119 |
asm.emitInt(imm);
|
|
120 |
}
|
|
121 |
|
|
122 |
@Override
|
|
123 |
protected int immediateSize() {
|
|
124 |
return 4;
|
|
125 |
}
|
|
126 |
},
|
|
127 |
|
|
128 |
QWORD(8, AMD64Kind.QWORD) {
|
|
129 |
@Override
|
|
130 |
protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
|
|
131 |
asm.emitInt(imm);
|
|
132 |
}
|
|
133 |
|
|
134 |
@Override
|
|
135 |
protected int immediateSize() {
|
|
136 |
return 4;
|
|
137 |
}
|
|
138 |
},
|
|
139 |
|
|
140 |
SS(4, AMD64Kind.SINGLE, 0xF3, true),
|
|
141 |
|
|
142 |
SD(8, AMD64Kind.DOUBLE, 0xF2, true),
|
|
143 |
|
|
144 |
PS(16, AMD64Kind.V128_SINGLE, true),
|
|
145 |
|
|
146 |
PD(16, AMD64Kind.V128_DOUBLE, 0x66, true);
|
|
147 |
|
|
148 |
private final int sizePrefix;
|
|
149 |
private final int bytes;
|
|
150 |
private final boolean xmm;
|
|
151 |
private final AMD64Kind kind;
|
|
152 |
|
|
153 |
OperandSize(int bytes, AMD64Kind kind) {
|
|
154 |
this(bytes, kind, 0);
|
|
155 |
}
|
|
156 |
|
|
157 |
OperandSize(int bytes, AMD64Kind kind, int sizePrefix) {
|
|
158 |
this(bytes, kind, sizePrefix, false);
|
|
159 |
}
|
|
160 |
|
|
161 |
OperandSize(int bytes, AMD64Kind kind, boolean xmm) {
|
|
162 |
this(bytes, kind, 0, xmm);
|
|
163 |
}
|
|
164 |
|
|
165 |
OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) {
|
|
166 |
this.sizePrefix = sizePrefix;
|
|
167 |
this.bytes = bytes;
|
|
168 |
this.kind = kind;
|
|
169 |
this.xmm = xmm;
|
|
170 |
}
|
|
171 |
|
|
172 |
public int getSizePrefix() {
|
|
173 |
return sizePrefix;
|
|
174 |
}
|
|
175 |
|
|
176 |
public int getBytes() {
|
|
177 |
return bytes;
|
|
178 |
}
|
|
179 |
|
|
180 |
public boolean isXmmType() {
|
|
181 |
return xmm;
|
|
182 |
}
|
|
183 |
|
|
184 |
public AMD64Kind getKind() {
|
|
185 |
return kind;
|
|
186 |
}
|
|
187 |
|
|
188 |
public static OperandSize get(PlatformKind kind) {
|
|
189 |
for (OperandSize operandSize : OperandSize.values()) {
|
|
190 |
if (operandSize.kind.equals(kind)) {
|
|
191 |
return operandSize;
|
|
192 |
}
|
|
193 |
}
|
|
194 |
throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString());
|
|
195 |
}
|
|
196 |
|
|
197 |
/**
|
|
198 |
* Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
|
|
199 |
* as sign-extended 32-bit values.
|
|
200 |
*
|
|
201 |
* @param asm
|
|
202 |
* @param imm
|
|
203 |
*/
|
|
204 |
protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
|
|
205 |
throw new UnsupportedOperationException();
|
|
206 |
}
|
|
207 |
|
|
208 |
protected int immediateSize() {
|
|
209 |
throw new UnsupportedOperationException();
|
|
210 |
}
|
|
211 |
}
|
|
212 |
|
54084
|
213 |
public static class OperandDataAnnotation extends CodeAnnotation {
|
51436
|
214 |
/**
|
|
215 |
* The position (bytes from the beginning of the method) of the operand.
|
|
216 |
*/
|
|
217 |
public final int operandPosition;
|
|
218 |
/**
|
|
219 |
* The size of the operand, in bytes.
|
|
220 |
*/
|
|
221 |
public final int operandSize;
|
|
222 |
/**
|
|
223 |
* The position (bytes from the beginning of the method) of the next instruction. On AMD64,
|
|
224 |
* RIP-relative operands are relative to this position.
|
|
225 |
*/
|
|
226 |
public final int nextInstructionPosition;
|
|
227 |
|
|
228 |
OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
|
|
229 |
super(instructionPosition);
|
|
230 |
|
|
231 |
this.operandPosition = operandPosition;
|
|
232 |
this.operandSize = operandSize;
|
|
233 |
this.nextInstructionPosition = nextInstructionPosition;
|
|
234 |
}
|
|
235 |
|
|
236 |
@Override
|
|
237 |
public String toString() {
|
|
238 |
return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
|
|
239 |
}
|
|
240 |
}
|
|
241 |
|
|
242 |
protected void annotatePatchingImmediate(int operandOffset, int operandSize) {
|
|
243 |
if (codePatchingAnnotationConsumer != null) {
|
|
244 |
int pos = position();
|
54084
|
245 |
codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize));
|
51436
|
246 |
}
|
|
247 |
}
|
|
248 |
|
|
249 |
public final boolean supports(CPUFeature feature) {
|
|
250 |
return ((AMD64) target.arch).getFeatures().contains(feature);
|
|
251 |
}
|
|
252 |
|
52578
|
253 |
protected static boolean inRC(RegisterCategory rc, Register r) {
|
|
254 |
return r.getRegisterCategory().equals(rc);
|
|
255 |
}
|
|
256 |
|
51436
|
257 |
protected static int encode(Register r) {
|
52578
|
258 |
assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding;
|
51436
|
259 |
return r.encoding & 0x7;
|
|
260 |
}
|
|
261 |
|
|
262 |
private static final int MinEncodingNeedsRex = 8;
|
|
263 |
|
|
264 |
/**
|
|
265 |
* Constants for X86 prefix bytes.
|
|
266 |
*/
|
|
267 |
private static class Prefix {
|
|
268 |
private static final int REX = 0x40;
|
|
269 |
private static final int REXB = 0x41;
|
|
270 |
private static final int REXX = 0x42;
|
|
271 |
private static final int REXXB = 0x43;
|
|
272 |
private static final int REXR = 0x44;
|
|
273 |
private static final int REXRB = 0x45;
|
|
274 |
private static final int REXRX = 0x46;
|
|
275 |
private static final int REXRXB = 0x47;
|
|
276 |
private static final int REXW = 0x48;
|
|
277 |
private static final int REXWB = 0x49;
|
|
278 |
private static final int REXWX = 0x4A;
|
|
279 |
private static final int REXWXB = 0x4B;
|
|
280 |
private static final int REXWR = 0x4C;
|
|
281 |
private static final int REXWRB = 0x4D;
|
|
282 |
private static final int REXWRX = 0x4E;
|
|
283 |
private static final int REXWRXB = 0x4F;
|
52578
|
284 |
|
|
285 |
private static final int VEX2 = 0xC5;
|
|
286 |
private static final int VEX3 = 0xC4;
|
|
287 |
private static final int EVEX = 0x62;
|
51436
|
288 |
}
|
|
289 |
|
|
290 |
protected final void rexw() {
|
|
291 |
emitByte(Prefix.REXW);
|
|
292 |
}
|
|
293 |
|
|
294 |
protected final void prefix(Register reg) {
|
|
295 |
prefix(reg, false);
|
|
296 |
}
|
|
297 |
|
|
298 |
protected final void prefix(Register reg, boolean byteinst) {
|
|
299 |
int regEnc = reg.encoding;
|
|
300 |
if (regEnc >= 8) {
|
|
301 |
emitByte(Prefix.REXB);
|
|
302 |
} else if (byteinst && regEnc >= 4) {
|
|
303 |
emitByte(Prefix.REX);
|
|
304 |
}
|
|
305 |
}
|
|
306 |
|
|
307 |
protected final void prefixq(Register reg) {
|
|
308 |
if (reg.encoding < 8) {
|
|
309 |
emitByte(Prefix.REXW);
|
|
310 |
} else {
|
|
311 |
emitByte(Prefix.REXWB);
|
|
312 |
}
|
|
313 |
}
|
|
314 |
|
|
315 |
protected final void prefix(Register dst, Register src) {
|
|
316 |
prefix(dst, false, src, false);
|
|
317 |
}
|
|
318 |
|
|
319 |
protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) {
|
|
320 |
int dstEnc = dst.encoding;
|
|
321 |
int srcEnc = src.encoding;
|
|
322 |
if (dstEnc < 8) {
|
|
323 |
if (srcEnc >= 8) {
|
|
324 |
emitByte(Prefix.REXB);
|
|
325 |
} else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
|
|
326 |
emitByte(Prefix.REX);
|
|
327 |
}
|
|
328 |
} else {
|
|
329 |
if (srcEnc < 8) {
|
|
330 |
emitByte(Prefix.REXR);
|
|
331 |
} else {
|
|
332 |
emitByte(Prefix.REXRB);
|
|
333 |
}
|
|
334 |
}
|
|
335 |
}
|
|
336 |
|
|
337 |
/**
|
|
338 |
* Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded
|
|
339 |
* in the prefix.
|
|
340 |
*/
|
|
341 |
protected final void prefixq(Register reg, Register rm) {
|
|
342 |
int regEnc = reg.encoding;
|
|
343 |
int rmEnc = rm.encoding;
|
|
344 |
if (regEnc < 8) {
|
|
345 |
if (rmEnc < 8) {
|
|
346 |
emitByte(Prefix.REXW);
|
|
347 |
} else {
|
|
348 |
emitByte(Prefix.REXWB);
|
|
349 |
}
|
|
350 |
} else {
|
|
351 |
if (rmEnc < 8) {
|
|
352 |
emitByte(Prefix.REXWR);
|
|
353 |
} else {
|
|
354 |
emitByte(Prefix.REXWRB);
|
|
355 |
}
|
|
356 |
}
|
|
357 |
}
|
|
358 |
|
|
359 |
private static boolean needsRex(Register reg) {
|
|
360 |
return reg.encoding >= MinEncodingNeedsRex;
|
|
361 |
}
|
|
362 |
|
|
363 |
protected final void prefix(AMD64Address adr) {
|
|
364 |
if (needsRex(adr.getBase())) {
|
|
365 |
if (needsRex(adr.getIndex())) {
|
|
366 |
emitByte(Prefix.REXXB);
|
|
367 |
} else {
|
|
368 |
emitByte(Prefix.REXB);
|
|
369 |
}
|
|
370 |
} else {
|
|
371 |
if (needsRex(adr.getIndex())) {
|
|
372 |
emitByte(Prefix.REXX);
|
|
373 |
}
|
|
374 |
}
|
|
375 |
}
|
|
376 |
|
|
377 |
protected final void prefixq(AMD64Address adr) {
|
|
378 |
if (needsRex(adr.getBase())) {
|
|
379 |
if (needsRex(adr.getIndex())) {
|
|
380 |
emitByte(Prefix.REXWXB);
|
|
381 |
} else {
|
|
382 |
emitByte(Prefix.REXWB);
|
|
383 |
}
|
|
384 |
} else {
|
|
385 |
if (needsRex(adr.getIndex())) {
|
|
386 |
emitByte(Prefix.REXWX);
|
|
387 |
} else {
|
|
388 |
emitByte(Prefix.REXW);
|
|
389 |
}
|
|
390 |
}
|
|
391 |
}
|
|
392 |
|
|
393 |
protected void prefixb(AMD64Address adr, Register reg) {
|
|
394 |
prefix(adr, reg, true);
|
|
395 |
}
|
|
396 |
|
|
397 |
protected void prefix(AMD64Address adr, Register reg) {
|
|
398 |
prefix(adr, reg, false);
|
|
399 |
}
|
|
400 |
|
|
401 |
protected void prefix(AMD64Address adr, Register reg, boolean byteinst) {
|
|
402 |
if (reg.encoding < 8) {
|
|
403 |
if (needsRex(adr.getBase())) {
|
|
404 |
if (needsRex(adr.getIndex())) {
|
|
405 |
emitByte(Prefix.REXXB);
|
|
406 |
} else {
|
|
407 |
emitByte(Prefix.REXB);
|
|
408 |
}
|
|
409 |
} else {
|
|
410 |
if (needsRex(adr.getIndex())) {
|
|
411 |
emitByte(Prefix.REXX);
|
|
412 |
} else if (byteinst && reg.encoding >= 4) {
|
|
413 |
emitByte(Prefix.REX);
|
|
414 |
}
|
|
415 |
}
|
|
416 |
} else {
|
|
417 |
if (needsRex(adr.getBase())) {
|
|
418 |
if (needsRex(adr.getIndex())) {
|
|
419 |
emitByte(Prefix.REXRXB);
|
|
420 |
} else {
|
|
421 |
emitByte(Prefix.REXRB);
|
|
422 |
}
|
|
423 |
} else {
|
|
424 |
if (needsRex(adr.getIndex())) {
|
|
425 |
emitByte(Prefix.REXRX);
|
|
426 |
} else {
|
|
427 |
emitByte(Prefix.REXR);
|
|
428 |
}
|
|
429 |
}
|
|
430 |
}
|
|
431 |
}
|
|
432 |
|
|
433 |
protected void prefixq(AMD64Address adr, Register src) {
|
|
434 |
if (src.encoding < 8) {
|
|
435 |
if (needsRex(adr.getBase())) {
|
|
436 |
if (needsRex(adr.getIndex())) {
|
|
437 |
emitByte(Prefix.REXWXB);
|
|
438 |
} else {
|
|
439 |
emitByte(Prefix.REXWB);
|
|
440 |
}
|
|
441 |
} else {
|
|
442 |
if (needsRex(adr.getIndex())) {
|
|
443 |
emitByte(Prefix.REXWX);
|
|
444 |
} else {
|
|
445 |
emitByte(Prefix.REXW);
|
|
446 |
}
|
|
447 |
}
|
|
448 |
} else {
|
|
449 |
if (needsRex(adr.getBase())) {
|
|
450 |
if (needsRex(adr.getIndex())) {
|
|
451 |
emitByte(Prefix.REXWRXB);
|
|
452 |
} else {
|
|
453 |
emitByte(Prefix.REXWRB);
|
|
454 |
}
|
|
455 |
} else {
|
|
456 |
if (needsRex(adr.getIndex())) {
|
|
457 |
emitByte(Prefix.REXWRX);
|
|
458 |
} else {
|
|
459 |
emitByte(Prefix.REXWR);
|
|
460 |
}
|
|
461 |
}
|
|
462 |
}
|
|
463 |
}
|
|
464 |
|
|
465 |
/**
|
|
466 |
* Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
|
|
467 |
* register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
|
|
468 |
* field. The X bit must be 0.
|
|
469 |
*/
|
|
470 |
protected static int getRXB(Register reg, Register rm) {
|
|
471 |
int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
|
|
472 |
rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
|
|
473 |
return rxb;
|
|
474 |
}
|
|
475 |
|
|
476 |
/**
|
|
477 |
* Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
|
|
478 |
* are two cases for the memory operand:<br>
|
|
479 |
* ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
|
|
480 |
* <br>
|
|
481 |
* There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
|
|
482 |
*/
|
|
483 |
protected static int getRXB(Register reg, AMD64Address rm) {
|
|
484 |
int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
|
|
485 |
if (!rm.getIndex().equals(Register.None)) {
|
|
486 |
rxb |= (rm.getIndex().encoding & 0x08) >> 2;
|
|
487 |
}
|
|
488 |
if (!rm.getBase().equals(Register.None)) {
|
|
489 |
rxb |= (rm.getBase().encoding & 0x08) >> 3;
|
|
490 |
}
|
|
491 |
return rxb;
|
|
492 |
}
|
|
493 |
|
|
494 |
/**
|
|
495 |
* Emit the ModR/M byte for one register operand and an opcode extension in the R field.
|
|
496 |
* <p>
|
|
497 |
* Format: [ 11 reg r/m ]
|
|
498 |
*/
|
|
499 |
protected final void emitModRM(int reg, Register rm) {
|
|
500 |
assert (reg & 0x07) == reg;
|
|
501 |
emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
|
|
502 |
}
|
|
503 |
|
|
504 |
/**
|
|
505 |
* Emit the ModR/M byte for two register operands.
|
|
506 |
* <p>
|
|
507 |
* Format: [ 11 reg r/m ]
|
|
508 |
*/
|
|
509 |
protected final void emitModRM(Register reg, Register rm) {
|
|
510 |
emitModRM(reg.encoding & 0x07, rm);
|
|
511 |
}
|
|
512 |
|
58299
|
513 |
public static final int DEFAULT_DISP8_SCALE = 1;
|
|
514 |
|
51436
|
515 |
/**
|
|
516 |
* Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
|
|
517 |
*
|
|
518 |
* @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
|
|
519 |
*/
|
|
520 |
protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
|
|
521 |
assert !reg.equals(Register.None);
|
58299
|
522 |
emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, DEFAULT_DISP8_SCALE);
|
51436
|
523 |
}
|
|
524 |
|
|
525 |
protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
|
58299
|
526 |
emitOperandHelper(reg, addr, false, additionalInstructionSize, DEFAULT_DISP8_SCALE);
|
51436
|
527 |
}
|
|
528 |
|
|
529 |
protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
|
|
530 |
assert !reg.equals(Register.None);
|
58299
|
531 |
emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, DEFAULT_DISP8_SCALE);
|
51436
|
532 |
}
|
|
533 |
|
58299
|
534 |
protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) {
|
51436
|
535 |
assert !reg.equals(Register.None);
|
|
536 |
emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale);
|
|
537 |
}
|
|
538 |
|
|
539 |
/**
|
|
540 |
* Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
|
|
541 |
* extension in the R field.
|
|
542 |
*
|
|
543 |
* @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
|
|
544 |
* @param additionalInstructionSize the number of bytes that will be emitted after the operand,
|
|
545 |
* so that the start position of the next instruction can be computed even though
|
|
546 |
* this instruction has not been completely emitted yet.
|
|
547 |
* @param evexDisp8Scale the scaling factor for computing the compressed displacement of
|
|
548 |
* EVEX-encoded instructions. This scaling factor only matters when the emitted
|
|
549 |
* instruction uses one-byte-displacement form.
|
|
550 |
*/
|
|
551 |
private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) {
|
|
552 |
assert (reg & 0x07) == reg;
|
|
553 |
int regenc = reg << 3;
|
|
554 |
|
|
555 |
Register base = addr.getBase();
|
|
556 |
Register index = addr.getIndex();
|
|
557 |
|
|
558 |
Scale scale = addr.getScale();
|
|
559 |
int disp = addr.getDisplacement();
|
|
560 |
|
|
561 |
if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
|
|
562 |
// [00 000 101] disp32
|
|
563 |
assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
|
|
564 |
emitByte(0x05 | regenc);
|
|
565 |
if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
|
54084
|
566 |
codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
|
51436
|
567 |
}
|
|
568 |
emitInt(disp);
|
|
569 |
} else if (base.isValid()) {
|
|
570 |
boolean overriddenForce4Byte = force4Byte;
|
|
571 |
int baseenc = base.isValid() ? encode(base) : 0;
|
|
572 |
|
|
573 |
if (index.isValid()) {
|
|
574 |
int indexenc = encode(index) << 3;
|
|
575 |
// [base + indexscale + disp]
|
|
576 |
if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
|
|
577 |
// [base + indexscale]
|
|
578 |
// [00 reg 100][ss index base]
|
|
579 |
assert !index.equals(rsp) : "illegal addressing mode";
|
|
580 |
emitByte(0x04 | regenc);
|
|
581 |
emitByte(scale.log2 << 6 | indexenc | baseenc);
|
|
582 |
} else {
|
|
583 |
if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
|
|
584 |
if (disp % evexDisp8Scale == 0) {
|
|
585 |
int newDisp = disp / evexDisp8Scale;
|
|
586 |
if (isByte(newDisp)) {
|
|
587 |
disp = newDisp;
|
|
588 |
assert isByte(disp) && !overriddenForce4Byte;
|
|
589 |
}
|
|
590 |
} else {
|
|
591 |
overriddenForce4Byte = true;
|
|
592 |
}
|
|
593 |
}
|
|
594 |
if (isByte(disp) && !overriddenForce4Byte) {
|
|
595 |
// [base + indexscale + imm8]
|
|
596 |
// [01 reg 100][ss index base] imm8
|
|
597 |
assert !index.equals(rsp) : "illegal addressing mode";
|
|
598 |
emitByte(0x44 | regenc);
|
|
599 |
emitByte(scale.log2 << 6 | indexenc | baseenc);
|
|
600 |
emitByte(disp & 0xFF);
|
|
601 |
} else {
|
|
602 |
// [base + indexscale + disp32]
|
|
603 |
// [10 reg 100][ss index base] disp32
|
|
604 |
assert !index.equals(rsp) : "illegal addressing mode";
|
|
605 |
emitByte(0x84 | regenc);
|
|
606 |
emitByte(scale.log2 << 6 | indexenc | baseenc);
|
|
607 |
emitInt(disp);
|
|
608 |
}
|
|
609 |
}
|
|
610 |
} else if (base.equals(rsp) || base.equals(r12)) {
|
|
611 |
// [rsp + disp]
|
|
612 |
if (disp == 0) {
|
|
613 |
// [rsp]
|
|
614 |
// [00 reg 100][00 100 100]
|
|
615 |
emitByte(0x04 | regenc);
|
|
616 |
emitByte(0x24);
|
|
617 |
} else {
|
|
618 |
if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
|
|
619 |
if (disp % evexDisp8Scale == 0) {
|
|
620 |
int newDisp = disp / evexDisp8Scale;
|
|
621 |
if (isByte(newDisp)) {
|
|
622 |
disp = newDisp;
|
|
623 |
assert isByte(disp) && !overriddenForce4Byte;
|
|
624 |
}
|
|
625 |
} else {
|
|
626 |
overriddenForce4Byte = true;
|
|
627 |
}
|
|
628 |
}
|
|
629 |
if (isByte(disp) && !overriddenForce4Byte) {
|
|
630 |
// [rsp + imm8]
|
|
631 |
// [01 reg 100][00 100 100] disp8
|
|
632 |
emitByte(0x44 | regenc);
|
|
633 |
emitByte(0x24);
|
|
634 |
emitByte(disp & 0xFF);
|
|
635 |
} else {
|
|
636 |
// [rsp + imm32]
|
|
637 |
// [10 reg 100][00 100 100] disp32
|
|
638 |
emitByte(0x84 | regenc);
|
|
639 |
emitByte(0x24);
|
|
640 |
emitInt(disp);
|
|
641 |
}
|
|
642 |
}
|
|
643 |
} else {
|
|
644 |
// [base + disp]
|
|
645 |
assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
|
|
646 |
if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
|
|
647 |
// [base]
|
|
648 |
// [00 reg base]
|
|
649 |
emitByte(0x00 | regenc | baseenc);
|
|
650 |
} else {
|
|
651 |
if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
|
|
652 |
if (disp % evexDisp8Scale == 0) {
|
|
653 |
int newDisp = disp / evexDisp8Scale;
|
|
654 |
if (isByte(newDisp)) {
|
|
655 |
disp = newDisp;
|
|
656 |
assert isByte(disp) && !overriddenForce4Byte;
|
|
657 |
}
|
|
658 |
} else {
|
|
659 |
overriddenForce4Byte = true;
|
|
660 |
}
|
|
661 |
}
|
|
662 |
if (isByte(disp) && !overriddenForce4Byte) {
|
|
663 |
// [base + disp8]
|
|
664 |
// [01 reg base] disp8
|
|
665 |
emitByte(0x40 | regenc | baseenc);
|
|
666 |
emitByte(disp & 0xFF);
|
|
667 |
} else {
|
|
668 |
// [base + disp32]
|
|
669 |
// [10 reg base] disp32
|
|
670 |
emitByte(0x80 | regenc | baseenc);
|
|
671 |
emitInt(disp);
|
|
672 |
}
|
|
673 |
}
|
|
674 |
}
|
|
675 |
} else {
|
|
676 |
if (index.isValid()) {
|
|
677 |
int indexenc = encode(index) << 3;
|
|
678 |
// [indexscale + disp]
|
|
679 |
// [00 reg 100][ss index 101] disp32
|
|
680 |
assert !index.equals(rsp) : "illegal addressing mode";
|
|
681 |
emitByte(0x04 | regenc);
|
|
682 |
emitByte(scale.log2 << 6 | indexenc | 0x05);
|
|
683 |
emitInt(disp);
|
|
684 |
} else {
|
|
685 |
// [disp] ABSOLUTE
|
|
686 |
// [00 reg 100][00 100 101] disp32
|
|
687 |
emitByte(0x04 | regenc);
|
|
688 |
emitByte(0x25);
|
|
689 |
emitInt(disp);
|
|
690 |
}
|
|
691 |
}
|
|
692 |
}
|
|
693 |
|
|
694 |
private interface SIMDEncoder {
|
|
695 |
|
|
696 |
void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW);
|
|
697 |
|
|
698 |
void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW);
|
|
699 |
|
|
700 |
}
|
|
701 |
|
|
702 |
private class SSEEncoderImpl implements SIMDEncoder {
|
|
703 |
|
|
704 |
@Override
|
|
705 |
public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
|
52910
|
706 |
assert (!nds.isValid()) || nds.equals(xreg);
|
51436
|
707 |
if (sizePrefix > 0) {
|
|
708 |
emitByte(sizePrefix);
|
|
709 |
}
|
|
710 |
if (isRexW) {
|
|
711 |
prefixq(adr, xreg);
|
|
712 |
} else {
|
|
713 |
prefix(adr, xreg);
|
|
714 |
}
|
|
715 |
if (opcodeEscapePrefix > 0xFF) {
|
|
716 |
emitShort(opcodeEscapePrefix);
|
|
717 |
} else if (opcodeEscapePrefix > 0) {
|
|
718 |
emitByte(opcodeEscapePrefix);
|
|
719 |
}
|
|
720 |
}
|
|
721 |
|
|
722 |
@Override
|
|
723 |
public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
|
52910
|
724 |
assert (!nds.isValid()) || nds.equals(dst) || nds.equals(src);
|
51436
|
725 |
if (sizePrefix > 0) {
|
|
726 |
emitByte(sizePrefix);
|
|
727 |
}
|
|
728 |
if (isRexW) {
|
|
729 |
prefixq(dst, src);
|
|
730 |
} else {
|
|
731 |
prefix(dst, src);
|
|
732 |
}
|
|
733 |
if (opcodeEscapePrefix > 0xFF) {
|
|
734 |
emitShort(opcodeEscapePrefix);
|
|
735 |
} else if (opcodeEscapePrefix > 0) {
|
|
736 |
emitByte(opcodeEscapePrefix);
|
|
737 |
}
|
|
738 |
}
|
|
739 |
}
|
|
740 |
|
|
741 |
public static final class VEXPrefixConfig {
|
|
742 |
public static final int L128 = 0;
|
|
743 |
public static final int L256 = 1;
|
58299
|
744 |
public static final int L512 = 2;
|
51736
|
745 |
public static final int LZ = 0;
|
51436
|
746 |
|
|
747 |
public static final int W0 = 0;
|
|
748 |
public static final int W1 = 1;
|
|
749 |
public static final int WIG = 0;
|
|
750 |
|
|
751 |
public static final int P_ = 0x0;
|
|
752 |
public static final int P_66 = 0x1;
|
|
753 |
public static final int P_F3 = 0x2;
|
|
754 |
public static final int P_F2 = 0x3;
|
|
755 |
|
|
756 |
public static final int M_0F = 0x1;
|
|
757 |
public static final int M_0F38 = 0x2;
|
|
758 |
public static final int M_0F3A = 0x3;
|
|
759 |
|
|
760 |
private VEXPrefixConfig() {
|
|
761 |
}
|
|
762 |
}
|
|
763 |
|
|
764 |
private class VEXEncoderImpl implements SIMDEncoder {
|
|
765 |
|
|
766 |
private int sizePrefixToPP(int sizePrefix) {
|
|
767 |
switch (sizePrefix) {
|
|
768 |
case 0x66:
|
|
769 |
return P_66;
|
|
770 |
case 0xF2:
|
|
771 |
return P_F2;
|
|
772 |
case 0xF3:
|
|
773 |
return P_F3;
|
|
774 |
default:
|
|
775 |
return P_;
|
|
776 |
}
|
|
777 |
}
|
|
778 |
|
|
779 |
private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) {
|
|
780 |
switch (opcodeEscapePrefix) {
|
|
781 |
case 0x0F:
|
|
782 |
return M_0F;
|
|
783 |
case 0x380F:
|
|
784 |
return M_0F38;
|
|
785 |
case 0x3A0F:
|
|
786 |
return M_0F3A;
|
|
787 |
default:
|
|
788 |
return 0;
|
|
789 |
}
|
|
790 |
}
|
|
791 |
|
|
792 |
@Override
|
|
793 |
public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
|
52578
|
794 |
assert reg.encoding < 16 : "encoding out of range: " + reg.encoding;
|
|
795 |
assert nds.encoding < 16 : "encoding out of range: " + nds.encoding;
|
|
796 |
emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0, true);
|
51436
|
797 |
}
|
|
798 |
|
|
799 |
@Override
|
|
800 |
public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
|
52578
|
801 |
assert dst.encoding < 16 : "encoding out of range: " + dst.encoding;
|
|
802 |
assert src.encoding < 16 : "encoding out of range: " + src.encoding;
|
|
803 |
assert nds.encoding < 16 : "encoding out of range: " + nds.encoding;
|
|
804 |
emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0, true);
|
51436
|
805 |
}
|
|
806 |
}
|
|
807 |
|
|
808 |
protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
|
|
809 |
simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
|
|
810 |
}
|
|
811 |
|
|
812 |
protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
|
|
813 |
simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW);
|
|
814 |
}
|
|
815 |
|
|
816 |
protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
|
|
817 |
simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
|
|
818 |
}
|
|
819 |
|
|
820 |
protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
|
|
821 |
simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW);
|
|
822 |
}
|
|
823 |
|
52578
|
824 |
// @formatter:off
|
|
825 |
//
|
|
826 |
// Instruction Format and VEX illustrated below (optional []):
|
|
827 |
//
|
|
828 |
// #of bytes: 2,3 1 1 1 1,2,4 1
|
|
829 |
// [Prefixes] VEX OpCode ModR/M [SIB] [Disp8*N] [Immediate]
|
|
830 |
// [Disp16,32]
|
|
831 |
//
|
|
832 |
// VEX: 0xC4 | P1 | P2
|
|
833 |
//
|
|
834 |
// 7 6 5 4 3 2 1 0
|
|
835 |
// P1 R X B m m m m m P[ 7:0]
|
|
836 |
// P2 W v v v v L p p P[15:8]
|
|
837 |
//
|
|
838 |
// VEX: 0xC5 | B1
|
|
839 |
//
|
|
840 |
// 7 6 5 4 3 2 1 0
|
|
841 |
// P1 R v v v v L p p P[7:0]
|
|
842 |
//
|
|
843 |
// Figure. Bit Field Layout of the VEX Prefix
|
|
844 |
//
|
|
845 |
// Table. VEX Prefix Bit Field Functional Grouping
|
|
846 |
//
|
|
847 |
// Notation Bit field Group Position Comment
|
|
848 |
// ---------- ------------------------- -------- -------------------
|
|
849 |
// VEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
|
|
850 |
// VEX.R REX.R inverse P[7] Combine with EVEX.R and ModR/M.reg.
|
|
851 |
// VEX.X REX.X inverse P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
|
|
852 |
// VEX.B REX.B inverse P[5]
|
|
853 |
// VEX.mmmmmm 0F, 0F_38, 0F_3A encoding P[4:0] b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved)
|
|
854 |
//
|
|
855 |
// VEX.W Opcode specific P[15]
|
|
856 |
// VEX.vvvv A register specifier P[14:11] In inverse form, b1111 if not used.
|
|
857 |
// P[6:3]
|
|
858 |
// VEX.L Vector length/RC P[10] b0/scalar or 128b vec, b1/256b vec.
|
|
859 |
// P[2]
|
|
860 |
// VEX.pp Compressed legacy prefix P[9:8] b00/None, b01/0x66, b10/0xF3, b11/0xF2
|
|
861 |
// P[1:0]
|
|
862 |
// @formatter:on
|
|
863 |
|
51436
|
864 |
/**
|
|
865 |
* Low-level function to encode and emit the VEX prefix.
|
|
866 |
* <p>
|
|
867 |
* 2 byte form: [1100 0101] [R vvvv L pp]<br>
|
|
868 |
* 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp]
|
|
869 |
* <p>
|
|
870 |
* The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function
|
|
871 |
* performs the 1s complement conversion, the caller is expected to pass plain unencoded
|
|
872 |
* arguments.
|
|
873 |
* <p>
|
|
874 |
* The pp field encodes an extension to the opcode:<br>
|
|
875 |
* 00: no extension<br>
|
|
876 |
* 01: 66<br>
|
|
877 |
* 10: F3<br>
|
|
878 |
* 11: F2
|
|
879 |
* <p>
|
|
880 |
* The m-mmmm field encodes the leading bytes of the opcode:<br>
|
|
881 |
* 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br>
|
|
882 |
* 00010: implied 0F 38 leading opcode bytes<br>
|
|
883 |
* 00011: implied 0F 3A leading opcode bytes
|
|
884 |
* <p>
|
|
885 |
* This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the
|
|
886 |
* m-mmmm field.
|
|
887 |
*/
|
52578
|
888 |
protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv, boolean checkAVX) {
|
|
889 |
assert !checkAVX || ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support";
|
51436
|
890 |
|
51736
|
891 |
assert l == L128 || l == L256 : "invalid value for VEX.L";
|
51436
|
892 |
assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp";
|
|
893 |
assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm";
|
51736
|
894 |
assert w == W0 || w == W1 : "invalid value for VEX.W";
|
51436
|
895 |
|
|
896 |
assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB";
|
|
897 |
assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv";
|
|
898 |
|
|
899 |
int rxb1s = rxb ^ 0x07;
|
|
900 |
int vvvv1s = vvvv ^ 0x0F;
|
|
901 |
if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) {
|
|
902 |
// 2 byte encoding
|
|
903 |
int byte2 = 0;
|
|
904 |
byte2 |= (rxb1s & 0x04) << 5;
|
|
905 |
byte2 |= vvvv1s << 3;
|
|
906 |
byte2 |= l << 2;
|
|
907 |
byte2 |= pp;
|
|
908 |
|
52578
|
909 |
emitByte(Prefix.VEX2);
|
51436
|
910 |
emitByte(byte2);
|
|
911 |
} else {
|
|
912 |
// 3 byte encoding
|
|
913 |
int byte2 = 0;
|
|
914 |
byte2 = (rxb1s & 0x07) << 5;
|
|
915 |
byte2 |= mmmmm;
|
|
916 |
|
|
917 |
int byte3 = 0;
|
|
918 |
byte3 |= w << 7;
|
|
919 |
byte3 |= vvvv1s << 3;
|
|
920 |
byte3 |= l << 2;
|
|
921 |
byte3 |= pp;
|
|
922 |
|
52578
|
923 |
emitByte(Prefix.VEX3);
|
51436
|
924 |
emitByte(byte2);
|
|
925 |
emitByte(byte3);
|
|
926 |
}
|
|
927 |
}
|
|
928 |
|
51736
|
929 |
public static int getLFlag(AVXSize size) {
|
51436
|
930 |
switch (size) {
|
|
931 |
case XMM:
|
|
932 |
return L128;
|
|
933 |
case YMM:
|
|
934 |
return L256;
|
|
935 |
case ZMM:
|
|
936 |
return L512;
|
|
937 |
default:
|
51736
|
938 |
return LZ;
|
51436
|
939 |
}
|
|
940 |
}
|
|
941 |
|
58299
|
942 |
public static boolean isAVX512Register(Register reg) {
|
|
943 |
return reg != null && reg.isValid() && AMD64.XMM.equals(reg.getRegisterCategory()) && reg.encoding > 15;
|
51436
|
944 |
}
|
|
945 |
|
58299
|
946 |
public final boolean vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w, int wEvex, boolean checkAVX) {
|
59095
|
947 |
if (isAVX512Register(dst) || isAVX512Register(nds) || isAVX512Register(src) || size == AVXSize.ZMM) {
|
58299
|
948 |
evexPrefix(dst, Register.None, nds, src, size, pp, mmmmm, wEvex, Z0, B0);
|
|
949 |
return true;
|
|
950 |
}
|
52578
|
951 |
emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX);
|
58299
|
952 |
return false;
|
|
953 |
}
|
|
954 |
|
|
955 |
public final boolean vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w, int wEvex, boolean checkAVX) {
|
59095
|
956 |
if (isAVX512Register(dst) || isAVX512Register(nds) || size == AVXSize.ZMM) {
|
58299
|
957 |
evexPrefix(dst, Register.None, nds, src, size, pp, mmmmm, wEvex, Z0, B0);
|
|
958 |
return true;
|
|
959 |
}
|
|
960 |
emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX);
|
|
961 |
return false;
|
51436
|
962 |
}
|
|
963 |
|
|
964 |
protected static final class EVEXPrefixConfig {
|
|
965 |
public static final int Z0 = 0x0;
|
|
966 |
public static final int Z1 = 0x1;
|
|
967 |
|
|
968 |
public static final int B0 = 0x0;
|
|
969 |
public static final int B1 = 0x1;
|
|
970 |
|
|
971 |
private EVEXPrefixConfig() {
|
|
972 |
}
|
|
973 |
}
|
|
974 |
|
|
975 |
private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1;
|
|
976 |
|
|
977 |
/**
|
|
978 |
* EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a
|
|
979 |
* scaling factor N depending on the tuple type and the vector length.
|
|
980 |
*
|
|
981 |
* Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5
|
|
982 |
*/
|
|
983 |
protected enum EVEXTuple {
|
58299
|
984 |
INVALID(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH),
|
51436
|
985 |
FV_NO_BROADCAST_32BIT(16, 32, 64),
|
|
986 |
FV_BROADCAST_32BIT(4, 4, 4),
|
|
987 |
FV_NO_BROADCAST_64BIT(16, 32, 64),
|
|
988 |
FV_BROADCAST_64BIT(8, 8, 8),
|
|
989 |
HV_NO_BROADCAST_32BIT(8, 16, 32),
|
|
990 |
HV_BROADCAST_32BIT(4, 4, 4),
|
|
991 |
FVM(16, 32, 64),
|
|
992 |
T1S_8BIT(1, 1, 1),
|
|
993 |
T1S_16BIT(2, 2, 2),
|
|
994 |
T1S_32BIT(4, 4, 4),
|
|
995 |
T1S_64BIT(8, 8, 8),
|
|
996 |
T1F_32BIT(4, 4, 4),
|
|
997 |
T1F_64BIT(8, 8, 8),
|
|
998 |
T2_32BIT(8, 8, 8),
|
|
999 |
T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16),
|
|
1000 |
T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16),
|
|
1001 |
T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32),
|
|
1002 |
T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32),
|
|
1003 |
HVM(8, 16, 32),
|
|
1004 |
QVM(4, 8, 16),
|
|
1005 |
OVM(2, 4, 8),
|
|
1006 |
M128(16, 16, 16),
|
|
1007 |
DUP(8, 32, 64);
|
|
1008 |
|
|
1009 |
private final int scalingFactorVL128;
|
|
1010 |
private final int scalingFactorVL256;
|
|
1011 |
private final int scalingFactorVL512;
|
|
1012 |
|
|
1013 |
EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) {
|
|
1014 |
this.scalingFactorVL128 = scalingFactorVL128;
|
|
1015 |
this.scalingFactorVL256 = scalingFactorVL256;
|
|
1016 |
this.scalingFactorVL512 = scalingFactorVL512;
|
|
1017 |
}
|
|
1018 |
|
|
1019 |
private static int verifyScalingFactor(int scalingFactor) {
|
|
1020 |
if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) {
|
|
1021 |
throw GraalError.shouldNotReachHere("Invalid scaling factor.");
|
|
1022 |
}
|
|
1023 |
return scalingFactor;
|
|
1024 |
}
|
|
1025 |
|
|
1026 |
public int getDisp8ScalingFactor(AVXSize size) {
|
|
1027 |
switch (size) {
|
|
1028 |
case XMM:
|
|
1029 |
return verifyScalingFactor(scalingFactorVL128);
|
|
1030 |
case YMM:
|
|
1031 |
return verifyScalingFactor(scalingFactorVL256);
|
|
1032 |
case ZMM:
|
|
1033 |
return verifyScalingFactor(scalingFactorVL512);
|
|
1034 |
default:
|
|
1035 |
throw GraalError.shouldNotReachHere("Unsupported vector size.");
|
|
1036 |
}
|
|
1037 |
}
|
|
1038 |
}
|
|
1039 |
|
52578
|
1040 |
// @formatter:off
|
|
1041 |
//
|
|
1042 |
// Instruction Format and EVEX illustrated below (optional []):
|
|
1043 |
//
|
|
1044 |
// #of bytes: 4 1 1 1 1,2,4 1
|
|
1045 |
// [Prefixes] EVEX OpCode ModR/M [SIB] [Disp8*N] [Immediate]
|
|
1046 |
// [Disp16,32]
|
|
1047 |
//
|
|
1048 |
// The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding
|
|
1049 |
// form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in
|
|
1050 |
// the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted
|
|
1051 |
// as P1, P2, and P3 individually or collectively as P[23:0] (see below).
|
|
1052 |
//
|
|
1053 |
// EVEX: 0x62 | P1 | P2 | P3
|
|
1054 |
//
|
|
1055 |
// 7 6 5 4 3 2 1 0
|
|
1056 |
// P1 R X B R' 0 0 m m P[ 7: 0]
|
|
1057 |
// P2 W v v v v 1 p p P[15: 8]
|
|
1058 |
// P3 z L' L b V' a a a P[23:16]
|
|
1059 |
//
|
|
1060 |
// Figure. Bit Field Layout of the EVEX Prefix
|
|
1061 |
//
|
|
1062 |
// Table. EVEX Prefix Bit Field Functional Grouping
|
|
1063 |
//
|
|
1064 |
// Notation Bit field Group Position Comment
|
|
1065 |
// --------- -------------------------- -------- -----------------------
|
|
1066 |
// EVEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
|
|
1067 |
// EVEX.X High-16 register specifier P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
|
|
1068 |
// EVEX.R' High-16 register specifier P[4] Combine with EVEX.R and ModR/M.reg.
|
|
1069 |
// -- Reserved P[3:2] Must be 0.
|
|
1070 |
// EVEX.mm Compressed legacy escape P[1:0] Identical to low two bits of VEX.mmmmm.
|
|
1071 |
//
|
|
1072 |
// EVEX.W Osize promotion/Opcode ext P[15]
|
|
1073 |
// EVEX.vvvv NDS register specifier P[14:11] Same as VEX.vvvv.
|
|
1074 |
// -- Fixed Value P[10] Must be 1.
|
|
1075 |
// EVEX.pp Compressed legacy prefix P[9:8] Identical to VEX.pp.
|
|
1076 |
//
|
|
1077 |
// EVEX.z Zeroing/Merging P[23]
|
|
1078 |
// EVEX.L'L Vector length/RC P[22:21]
|
|
1079 |
// EVEX.b Broadcast/RC/SAE Context P[20]
|
|
1080 |
// EVEX.V' High-16 NDS/VIDX register P[19] Combine with EVEX.vvvv or VSIB when present.
|
|
1081 |
// EVEX.aaa Embedded opmask register P[18:16]
|
|
1082 |
//
|
|
1083 |
// @formatter:on
|
|
1084 |
|
51436
|
1085 |
/**
|
|
1086 |
* Low-level function to encode and emit the EVEX prefix.
|
|
1087 |
* <p>
|
|
1088 |
* 62 [0 1 1 0 0 0 1 0]<br>
|
|
1089 |
* P1 [R X B R'0 0 m m]<br>
|
|
1090 |
* P2 [W v v v v 1 p p]<br>
|
|
1091 |
* P3 [z L'L b V'a a a]
|
|
1092 |
* <p>
|
|
1093 |
* The pp field encodes an extension to the opcode:<br>
|
|
1094 |
* 00: no extension<br>
|
|
1095 |
* 01: 66<br>
|
|
1096 |
* 10: F3<br>
|
|
1097 |
* 11: F2
|
|
1098 |
* <p>
|
|
1099 |
* The mm field encodes the leading bytes of the opcode:<br>
|
|
1100 |
* 01: implied 0F leading opcode byte<br>
|
|
1101 |
* 10: implied 0F 38 leading opcode bytes<br>
|
|
1102 |
* 11: implied 0F 3A leading opcode bytes
|
|
1103 |
* <p>
|
|
1104 |
* The z field encodes the merging mode (merge or zero).
|
|
1105 |
* <p>
|
|
1106 |
* The b field encodes the source broadcast or data rounding modes.
|
|
1107 |
* <p>
|
|
1108 |
* The aaa field encodes the operand mask register.
|
|
1109 |
*/
|
|
1110 |
private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) {
|
|
1111 |
assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support";
|
|
1112 |
|
51736
|
1113 |
assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L";
|
51436
|
1114 |
assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp";
|
|
1115 |
assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm";
|
51736
|
1116 |
assert w == W0 || w == W1 : "invalid value for EVEX.W";
|
51436
|
1117 |
|
|
1118 |
assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB";
|
|
1119 |
assert (reg & 0x1F) == reg : "invalid value for EVEX.R'";
|
52578
|
1120 |
assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv";
|
51436
|
1121 |
|
|
1122 |
assert z == Z0 || z == Z1 : "invalid value for EVEX.z";
|
|
1123 |
assert b == B0 || b == B1 : "invalid value for EVEX.b";
|
|
1124 |
assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa";
|
|
1125 |
|
52578
|
1126 |
emitByte(Prefix.EVEX);
|
51436
|
1127 |
int p1 = 0;
|
|
1128 |
p1 |= ((rxb ^ 0x07) & 0x07) << 5;
|
|
1129 |
p1 |= reg < 16 ? 0x10 : 0;
|
|
1130 |
p1 |= mm;
|
|
1131 |
emitByte(p1);
|
|
1132 |
|
|
1133 |
int p2 = 0;
|
|
1134 |
p2 |= w << 7;
|
|
1135 |
p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3;
|
52578
|
1136 |
p2 |= 0x04;
|
51436
|
1137 |
p2 |= pp;
|
|
1138 |
emitByte(p2);
|
|
1139 |
|
|
1140 |
int p3 = 0;
|
|
1141 |
p3 |= z << 7;
|
|
1142 |
p3 |= l << 5;
|
|
1143 |
p3 |= b << 4;
|
|
1144 |
p3 |= vvvvv < 16 ? 0x08 : 0;
|
|
1145 |
p3 |= aaa;
|
|
1146 |
emitByte(p3);
|
|
1147 |
}
|
|
1148 |
|
52578
|
1149 |
/**
|
|
1150 |
* Get RXB bits for register-register instructions in EVEX-encoding, where ModRM.rm contains a
|
|
1151 |
* register index. The R bit extends the ModRM.reg field and the X and B bits extends the
|
|
1152 |
* ModRM.rm field.
|
|
1153 |
*/
|
51436
|
1154 |
private static int getRXBForEVEX(Register reg, Register rm) {
|
|
1155 |
int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
|
|
1156 |
rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3;
|
|
1157 |
return rxb;
|
|
1158 |
}
|
|
1159 |
|
|
1160 |
/**
|
|
1161 |
* Helper method for emitting EVEX prefix in the form of RRRR.
|
|
1162 |
*/
|
|
1163 |
protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) {
|
52578
|
1164 |
assert !mask.isValid() || inRC(MASK, mask);
|
51436
|
1165 |
emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
|
|
1166 |
}
|
|
1167 |
|
|
1168 |
/**
|
|
1169 |
* Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in
|
|
1170 |
* EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the
|
|
1171 |
* user of this API should make sure to encode the operands using
|
58299
|
1172 |
* {@link #emitOperandHelper(Register, AMD64Address, int, int)}.
|
51436
|
1173 |
*/
|
|
1174 |
protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) {
|
52578
|
1175 |
assert !mask.isValid() || inRC(MASK, mask);
|
51436
|
1176 |
emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
|
|
1177 |
}
|
|
1178 |
|
|
1179 |
}
|