23 |
23 |
24 |
24 |
25 |
25 |
26 package org.graalvm.compiler.asm.aarch64; |
26 package org.graalvm.compiler.asm.aarch64; |
27 |
27 |
|
28 import static jdk.vm.ci.aarch64.AArch64.CPU; |
|
29 import static jdk.vm.ci.aarch64.AArch64.rscratch1; |
|
30 import static jdk.vm.ci.aarch64.AArch64.rscratch2; |
|
31 import static jdk.vm.ci.aarch64.AArch64.sp; |
|
32 import static jdk.vm.ci.aarch64.AArch64.zr; |
28 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.BASE_REGISTER_ONLY; |
33 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.BASE_REGISTER_ONLY; |
29 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.EXTENDED_REGISTER_OFFSET; |
34 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.EXTENDED_REGISTER_OFFSET; |
30 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SCALED; |
35 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SCALED; |
31 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_UNSCALED; |
36 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_UNSCALED; |
32 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.REGISTER_OFFSET; |
37 import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.REGISTER_OFFSET; |
33 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_BASE; |
38 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_BASE; |
34 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_INDEX; |
39 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_INDEX; |
35 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.NO_WORK; |
40 import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.AddressGenerationPlan.WorkPlan.NO_WORK; |
36 |
41 |
37 import org.graalvm.compiler.asm.BranchTargetOutOfBoundsException; |
42 import org.graalvm.compiler.asm.BranchTargetOutOfBoundsException; |
38 |
|
39 import static jdk.vm.ci.aarch64.AArch64.CPU; |
|
40 import static jdk.vm.ci.aarch64.AArch64.r8; |
|
41 import static jdk.vm.ci.aarch64.AArch64.r9; |
|
42 import static jdk.vm.ci.aarch64.AArch64.sp; |
|
43 import static jdk.vm.ci.aarch64.AArch64.zr; |
|
44 |
|
45 import org.graalvm.compiler.asm.Label; |
43 import org.graalvm.compiler.asm.Label; |
46 import org.graalvm.compiler.core.common.NumUtil; |
44 import org.graalvm.compiler.core.common.NumUtil; |
47 import org.graalvm.compiler.debug.GraalError; |
45 import org.graalvm.compiler.debug.GraalError; |
48 |
46 |
49 import jdk.vm.ci.aarch64.AArch64; |
47 import jdk.vm.ci.aarch64.AArch64; |
50 import jdk.vm.ci.code.Register; |
48 import jdk.vm.ci.code.Register; |
51 import jdk.vm.ci.code.TargetDescription; |
49 import jdk.vm.ci.code.TargetDescription; |
52 |
50 |
53 public class AArch64MacroAssembler extends AArch64Assembler { |
51 public class AArch64MacroAssembler extends AArch64Assembler { |
54 |
52 |
55 private final ScratchRegister[] scratchRegister = new ScratchRegister[]{new ScratchRegister(r8), new ScratchRegister(r9)}; |
53 private final ScratchRegister[] scratchRegister = new ScratchRegister[]{new ScratchRegister(rscratch1), new ScratchRegister(rscratch2)}; |
56 |
54 |
57 // Points to the next free scratch register |
55 // Points to the next free scratch register |
58 private int nextFreeScratchRegister = 0; |
56 private int nextFreeScratchRegister = 0; |
59 |
57 |
60 public AArch64MacroAssembler(TargetDescription target) { |
58 public AArch64MacroAssembler(TargetDescription target) { |
337 |
335 |
338 /** |
336 /** |
339 * Generates a 64-bit immediate move code sequence. |
337 * Generates a 64-bit immediate move code sequence. |
340 * |
338 * |
341 * @param dst general purpose register. May not be null, stackpointer or zero-register. |
339 * @param dst general purpose register. May not be null, stackpointer or zero-register. |
342 * @param imm |
340 * @param imm the value to move into the register |
343 */ |
341 * @param annotateImm Flag denoting if annotation should be added. |
344 private void mov64(Register dst, long imm) { |
342 */ |
|
343 private void mov64(Register dst, long imm, boolean annotateImm) { |
345 // We have to move all non zero parts of the immediate in 16-bit chunks |
344 // We have to move all non zero parts of the immediate in 16-bit chunks |
|
345 int numMovs = 0; |
|
346 int pos = position(); |
346 boolean firstMove = true; |
347 boolean firstMove = true; |
347 for (int offset = 0; offset < 64; offset += 16) { |
348 for (int offset = 0; offset < 64; offset += 16) { |
348 int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16); |
349 int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16); |
349 if (chunk == 0) { |
350 if (chunk == 0) { |
350 continue; |
351 continue; |
376 * @param imm immediate loaded into register. |
381 * @param imm immediate loaded into register. |
377 * @param annotateImm Flag to signal of the immediate value should be annotated. |
382 * @param annotateImm Flag to signal of the immediate value should be annotated. |
378 */ |
383 */ |
379 public void mov(Register dst, long imm, boolean annotateImm) { |
384 public void mov(Register dst, long imm, boolean annotateImm) { |
380 assert dst.getRegisterCategory().equals(CPU); |
385 assert dst.getRegisterCategory().equals(CPU); |
381 int pos = position(); |
|
382 if (imm == 0L) { |
386 if (imm == 0L) { |
383 movx(dst, zr); |
387 movx(dst, zr); |
384 } else if (LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) { |
388 } else if (LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) { |
385 or(64, dst, zr, imm); |
389 or(64, dst, zr, imm); |
386 } else if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) { |
390 } else if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) { |
1305 super.fdiv(size, dst, n, d); |
1306 super.fdiv(size, dst, n, d); |
1306 super.frintz(size, dst, dst); |
1307 super.frintz(size, dst, dst); |
1307 super.fmsub(size, dst, dst, d, n); |
1308 super.fmsub(size, dst, dst, d, n); |
1308 } |
1309 } |
1309 |
1310 |
|
1311 /** |
|
1312 * dst = src1 * src2 + src3. |
|
1313 * |
|
1314 * @param size register size. |
|
1315 * @param dst floating point register. May not be null. |
|
1316 * @param src1 floating point register. May not be null. |
|
1317 * @param src2 floating point register. May not be null. |
|
1318 * @param src3 floating point register. May not be null. |
|
1319 */ |
|
1320 @Override |
|
1321 public void fmadd(int size, Register dst, Register src1, Register src2, Register src3) { |
|
1322 super.fmadd(size, dst, src1, src2, src3); |
|
1323 } |
|
1324 |
1310 /* Branches */ |
1325 /* Branches */ |
1311 |
1326 |
1312 /** |
1327 /** |
1313 * Compares x and y and sets condition flags. |
1328 * Compares x and y and sets condition flags. |
1314 * |
1329 * |
1362 ScratchRegister sc2 = getScratchRegister()) { |
1377 ScratchRegister sc2 = getScratchRegister()) { |
1363 switch (size) { |
1378 switch (size) { |
1364 case 64: { |
1379 case 64: { |
1365 // Be careful with registers: it's possible that x, y, and dst are the same |
1380 // Be careful with registers: it's possible that x, y, and dst are the same |
1366 // register. |
1381 // register. |
1367 Register rscratch1 = sc1.getRegister(); |
1382 Register temp1 = sc1.getRegister(); |
1368 Register rscratch2 = sc2.getRegister(); |
1383 Register temp2 = sc2.getRegister(); |
1369 mul(64, rscratch1, x, y); // Result bits 0..63 |
1384 mul(64, temp1, x, y); // Result bits 0..63 |
1370 smulh(64, rscratch2, x, y); // Result bits 64..127 |
1385 smulh(64, temp2, x, y); // Result bits 64..127 |
1371 // Top is pure sign ext |
1386 // Top is pure sign ext |
1372 subs(64, zr, rscratch2, rscratch1, ShiftType.ASR, 63); |
1387 subs(64, zr, temp2, temp1, ShiftType.ASR, 63); |
1373 // Copy all 64 bits of the result into dst |
1388 // Copy all 64 bits of the result into dst |
1374 mov(64, dst, rscratch1); |
1389 mov(64, dst, temp1); |
1375 mov(rscratch1, 0x80000000); |
1390 mov(temp1, 0x80000000); |
1376 // Develop 0 (EQ), or 0x80000000 (NE) |
1391 // Develop 0 (EQ), or 0x80000000 (NE) |
1377 cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE); |
1392 cmov(32, temp1, temp1, zr, ConditionFlag.NE); |
1378 cmp(32, rscratch1, 1); |
1393 cmp(32, temp1, 1); |
1379 // 0x80000000 - 1 => VS |
1394 // 0x80000000 - 1 => VS |
1380 break; |
1395 break; |
1381 } |
1396 } |
1382 case 32: { |
1397 case 32: { |
1383 Register rscratch1 = sc1.getRegister(); |
1398 Register temp1 = sc1.getRegister(); |
1384 smaddl(rscratch1, x, y, zr); |
1399 smaddl(temp1, x, y, zr); |
1385 // Copy the low 32 bits of the result into dst |
1400 // Copy the low 32 bits of the result into dst |
1386 mov(32, dst, rscratch1); |
1401 mov(32, dst, temp1); |
1387 subs(64, zr, rscratch1, rscratch1, ExtendType.SXTW, 0); |
1402 subs(64, zr, temp1, temp1, ExtendType.SXTW, 0); |
1388 // NE => overflow |
1403 // NE => overflow |
1389 mov(rscratch1, 0x80000000); |
1404 mov(temp1, 0x80000000); |
1390 // Develop 0 (EQ), or 0x80000000 (NE) |
1405 // Develop 0 (EQ), or 0x80000000 (NE) |
1391 cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE); |
1406 cmov(32, temp1, temp1, zr, ConditionFlag.NE); |
1392 cmp(32, rscratch1, 1); |
1407 cmp(32, temp1, 1); |
1393 // 0x80000000 - 1 => VS |
1408 // 0x80000000 - 1 => VS |
1394 break; |
1409 break; |
1395 } |
1410 } |
1396 } |
1411 } |
1397 } |
1412 } |
1701 int sizeEncoding = information & 1; |
1716 int sizeEncoding = information & 1; |
1702 int regEncoding = information >>> 1; |
1717 int regEncoding = information >>> 1; |
1703 Register reg = AArch64.cpuRegisters.get(regEncoding); |
1718 Register reg = AArch64.cpuRegisters.get(regEncoding); |
1704 // 1 => 64; 0 => 32 |
1719 // 1 => 64; 0 => 32 |
1705 int size = sizeEncoding * 32 + 32; |
1720 int size = sizeEncoding * 32 + 32; |
|
1721 if (!NumUtil.isSignedNbit(21, branchOffset)) { |
|
1722 throw new BranchTargetOutOfBoundsException(true, "Branch target %d out of bounds", branchOffset); |
|
1723 } |
1706 switch (type) { |
1724 switch (type) { |
1707 case BRANCH_NONZERO: |
1725 case BRANCH_NONZERO: |
1708 super.cbnz(size, reg, branchOffset, branch); |
1726 super.cbnz(size, reg, branchOffset, branch); |
1709 break; |
1727 break; |
1710 case BRANCH_ZERO: |
1728 case BRANCH_ZERO: |
1803 public interface MacroInstruction { |
1821 public interface MacroInstruction { |
1804 void patch(int codePos, int relative, byte[] code); |
1822 void patch(int codePos, int relative, byte[] code); |
1805 } |
1823 } |
1806 |
1824 |
1807 /** |
1825 /** |
1808 * Emits elf patchable adrp add sequence. |
1826 * Emits elf patchable adrp ldr sequence. |
1809 */ |
1827 */ |
1810 public void adrAddRel(int srcSize, Register result, AArch64Address a) { |
1828 public void adrpLdr(int srcSize, Register result, AArch64Address a) { |
1811 if (codePatchingAnnotationConsumer != null) { |
1829 if (codePatchingAnnotationConsumer != null) { |
1812 codePatchingAnnotationConsumer.accept(new ADRADDPRELMacroInstruction(position())); |
1830 codePatchingAnnotationConsumer.accept(new AdrpLdrMacroInstruction(position())); |
1813 } |
1831 } |
1814 super.adrp(a.getBase()); |
1832 super.adrp(a.getBase()); |
1815 this.ldr(srcSize, result, a); |
1833 this.ldr(srcSize, result, a); |
1816 } |
1834 } |
1817 |
1835 |
1818 public static class ADRADDPRELMacroInstruction extends CodeAnnotation implements MacroInstruction { |
1836 public static class AdrpLdrMacroInstruction extends CodeAnnotation implements MacroInstruction { |
1819 public ADRADDPRELMacroInstruction(int position) { |
1837 public AdrpLdrMacroInstruction(int position) { |
1820 super(position); |
1838 super(position); |
1821 } |
1839 } |
1822 |
1840 |
1823 @Override |
1841 @Override |
1824 public String toString() { |
1842 public String toString() { |
1825 return "ADR_PREL_PG"; |
1843 return "ADRP_LDR"; |
1826 } |
1844 } |
1827 |
1845 |
1828 @Override |
1846 @Override |
1829 public void patch(int codePos, int relative, byte[] code) { |
1847 public void patch(int codePos, int relative, byte[] code) { |
1830 throw GraalError.unimplemented(); |
1848 throw GraalError.unimplemented(); |