src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64Assembler.java
changeset 54601 c40b2a190173
parent 54328 37648a9c4a6a
child 55509 d58442b8abc1
child 58678 9cf78a70fa4f
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64Assembler.java	Tue Apr 23 14:09:54 2019 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64Assembler.java	Tue Apr 23 22:55:09 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2018, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -25,9 +25,15 @@
 
 package org.graalvm.compiler.asm.aarch64;
 
+import static jdk.vm.ci.aarch64.AArch64.CPU;
+import static jdk.vm.ci.aarch64.AArch64.SIMD;
 import static jdk.vm.ci.aarch64.AArch64.cpuRegisters;
+import static jdk.vm.ci.aarch64.AArch64.r0;
+import static jdk.vm.ci.aarch64.AArch64.sp;
+import static jdk.vm.ci.aarch64.AArch64.zr;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADD;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADDS;
+import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADDV;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADR;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.ADRP;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.AND;
@@ -44,6 +50,7 @@
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLREX;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLS;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CLZ;
+import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CNT;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSEL;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSINC;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.CSNEG;
@@ -107,27 +114,24 @@
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUB;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SUBS;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.SWP;
+import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBNZ;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBZ;
-import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.TBNZ;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UBFM;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UDIV;
+import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.Instruction.UMOV;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP32;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.FP64;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General32;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.General64;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.floatFromSize;
 import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.generalFromSize;
-import static jdk.vm.ci.aarch64.AArch64.CPU;
-import static jdk.vm.ci.aarch64.AArch64.SIMD;
-import static jdk.vm.ci.aarch64.AArch64.r0;
-import static jdk.vm.ci.aarch64.AArch64.sp;
-import static jdk.vm.ci.aarch64.AArch64.zr;
+import static org.graalvm.compiler.asm.aarch64.AArch64Assembler.InstructionType.simdFromSize;
 
 import java.util.Arrays;
 
 import org.graalvm.compiler.asm.Assembler;
+import org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode;
 import org.graalvm.compiler.core.common.NumUtil;
-import org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode;
 import org.graalvm.compiler.debug.GraalError;
 
 import jdk.vm.ci.aarch64.AArch64;
@@ -371,15 +375,41 @@
     }
 
     /**
+     * Enumeration of all different lane types of SIMD register.
+     *
+     * Byte(B):8b/lane; HalfWord(H):16b/lane; Word(S):32b/lane; DoubleWord(D):64b/lane.
+     */
+    public enum SIMDElementSize {
+        Byte(0, 8),
+        HalfWord(1, 16),
+        Word(2, 32),
+        DoubleWord(3, 64);
+
+        public final int encoding;
+        public final int nbits;
+
+        SIMDElementSize(int encoding, int nbits) {
+            this.encoding = encoding;
+            this.nbits = nbits;
+        }
+    }
+
+    /**
      * Enumeration of all different instruction kinds: General32/64 are the general instructions
      * (integer, branch, etc.), for 32-, respectively 64-bit operands. FP32/64 is the encoding for
-     * the 32/64bit float operations
+     * the 32/64bit float operations. SIMDByte/HalfWord/Word/DoubleWord is the encoding for SIMD
+     * instructions
      */
     protected enum InstructionType {
         General32(0b00 << 30, 32, true),
         General64(0b10 << 30, 64, true),
         FP32(0x00000000, 32, false),
-        FP64(0x00400000, 64, false);
+        FP64(0x00400000, 64, false),
+
+        SIMDByte(0x01, 8, false),
+        SIMDHalfWord(0x02, 16, false),
+        SIMDWord(0x04, 32, false),
+        SIMDDoubleWord(0x08, 64, false);
 
         public final int encoding;
         public final int width;
@@ -401,6 +431,20 @@
             return size == 32 ? FP32 : FP64;
         }
 
+        public static InstructionType simdFromSize(int size) {
+            switch (size) {
+                case 8:
+                    return SIMDByte;
+                case 16:
+                    return SIMDHalfWord;
+                case 32:
+                    return SIMDWord;
+                case 64:
+                    return SIMDDoubleWord;
+                default:
+                    throw GraalError.shouldNotReachHere();
+            }
+        }
     }
 
     private static final int ImmediateOffset = 10;
@@ -493,6 +537,10 @@
     private static final int LDADDAcquireOffset = 23;
     private static final int LDADDReleaseOffset = 22;
 
+    private static final int SIMDImm5Offset = 16;
+    private static final int SIMDQBitOffset = 30;
+    private static final int SIMDSizeOffset = 22;
+
     /**
      * Encoding for all instructions.
      */
@@ -611,7 +659,7 @@
         FCSEL(0x1E200C00),
 
         INS(0x4e081c00),
-        UMOV(0x4e083c00),
+        UMOV(0x0e003c00),
 
         CNT(0xe205800),
         USRA(0x6f001400),
@@ -626,7 +674,9 @@
         MRS(0xD5300000),
         MSR(0xD5100000),
 
-        BLR_NATIVE(0xc0000000);
+        BLR_NATIVE(0xc0000000),
+
+        ADDV(0x0e31b800);
 
         public final int encoding;
 
@@ -2957,4 +3007,45 @@
         }
     }
 
+    /**
+     * dst[0...n] = countBitCountOfEachByte(src[0...n]), n = size/8.
+     *
+     * @param size register size. Has to be 64 or 128.
+     * @param dst SIMD register. Should not be null.
+     * @param src SIMD register. Should not be null.
+     */
+    public void cnt(int size, Register dst, Register src) {
+        assert 64 == size || 128 == size : "Invalid size for cnt";
+        emitInt((size >> 7) << SIMDQBitOffset | CNT.encoding | rd(dst) | rs1(src));
+    }
+
+    /**
+     * dst = src[0] + ....+ src[n].
+     *
+     * @param size register size. Has to be 64 or 128.
+     * @param laneWidth the width that SIMD register is treated as different lanes with.
+     * @param dst SIMD register. Should not be null.
+     * @param src SIMD register. Should not be null.
+     */
+    public void addv(int size, SIMDElementSize laneWidth, Register dst, Register src) {
+        assert 64 == size || 128 == size : "Invalid size for addv";
+        assert SIMDElementSize.DoubleWord != laneWidth : "Invalid lane width for addv";
+        assert 64 != size || SIMDElementSize.Word != laneWidth : "Invalid size and lane combination for addv";
+        emitInt((size >> 7) << SIMDQBitOffset | laneWidth.encoding << SIMDSizeOffset | ADDV.encoding | rd(dst) | rs1(src));
+    }
+
+    /**
+     * dst = src[srcIdx].
+     *
+     * @param size register size. Can be 8, 16, 32 or 64.
+     * @param dst general purpose register. Should not be null or zero-register.
+     * @param srcIdx lane index of source register that dest data is from.
+     * @param src SIMD register. Should not be null.
+     */
+    public void umov(int size, Register dst, int srcIdx, Register src) {
+        assert (srcIdx + 1) * size <= 128 : "Invalid src vectRegister index";
+        InstructionType simdDataType = simdFromSize(size);
+        int imm5 = simdDataType.encoding | srcIdx << Integer.numberOfTrailingZeros(simdDataType.encoding) + 1;
+        emitInt((size >> 6) << SIMDQBitOffset | imm5 << SIMDImm5Offset | UMOV.encoding | rd(dst) | rs1(src));
+    }
 }