8129551: aarch64: some regressions introduced by addition of vectorisation code
authorenevill
Tue, 23 Jun 2015 18:56:17 +0000
changeset 31227 964d24a82077
parent 31216 43d0179ee9de
child 31239 6afe1cebecf7
8129551: aarch64: some regressions introduced by addition of vectorisation code Summary: Fix regressions Reviewed-by: kvn
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Wed Jul 05 20:38:50 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Tue Jun 23 18:56:17 2015 +0000
@@ -13276,7 +13276,7 @@
   ins_cost(INSN_COST);
   format %{ "movi  $dst, $con\t# vector(16B)" %}
   ins_encode %{
-    __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
+    __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
   %}
   ins_pipe(pipe_class_default);
 %}
@@ -13298,7 +13298,7 @@
   ins_cost(INSN_COST);
   format %{ "movi  $dst, $con\t# vector(8H)" %}
   ins_encode %{
-    __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
+    __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
   %}
   ins_pipe(pipe_class_default);
 %}
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Wed Jul 05 20:38:50 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Jun 23 18:56:17 2015 +0000
@@ -491,6 +491,11 @@
         i->rf(_index, 16);
         i->f(_ext.option(), 15, 13);
         unsigned size = i->get(31, 30);
+        if (i->get(26, 26) && i->get(23, 23)) {
+          // SIMD Q Type - Size = 128 bits
+          assert(size == 0, "bad size");
+          size = 0b100;
+        }
         if (size == 0) // It's a byte
           i->f(_ext.shift() >= 0, 12);
         else {
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Jul 05 20:38:50 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Jun 23 18:56:17 2015 +0000
@@ -1408,6 +1408,52 @@
   movk(r, imm64 & 0xffff, 32);
 }
 
+// Macro to mov replicated immediate to vector register.
+//  Vd will get the following values for different arrangements in T
+//   imm32 == hex 000000gh  T8B:  Vd = ghghghghghghghgh
+//   imm32 == hex 000000gh  T16B: Vd = ghghghghghghghghghghghghghghghgh
+//   imm32 == hex 0000efgh  T4H:  Vd = efghefghefghefgh
+//   imm32 == hex 0000efgh  T8H:  Vd = efghefghefghefghefghefghefghefgh
+//   imm32 == hex abcdefgh  T2S:  Vd = abcdefghabcdefgh
+//   imm32 == hex abcdefgh  T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
+//   T1D/T2D: invalid
+void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
+  assert(T != T1D && T != T2D, "invalid arrangement");
+  if (T == T8B || T == T16B) {
+    assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
+    movi(Vd, T, imm32 & 0xff, 0);
+    return;
+  }
+  u_int32_t nimm32 = ~imm32;
+  if (T == T4H || T == T8H) {
+    assert((imm32  & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
+    imm32 &= 0xffff;
+    nimm32 &= 0xffff;
+  }
+  u_int32_t x = imm32;
+  int movi_cnt = 0;
+  int movn_cnt = 0;
+  while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
+  x = nimm32;
+  while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
+  if (movn_cnt < movi_cnt) imm32 = nimm32;
+  unsigned lsl = 0;
+  while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
+  if (movn_cnt < movi_cnt)
+    mvni(Vd, T, imm32 & 0xff, lsl);
+  else
+    movi(Vd, T, imm32 & 0xff, lsl);
+  imm32 >>= 8; lsl += 8;
+  while (imm32) {
+    while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
+    if (movn_cnt < movi_cnt)
+      bici(Vd, T, imm32 & 0xff, lsl);
+    else
+      orri(Vd, T, imm32 & 0xff, lsl);
+    lsl += 8; imm32 >>= 8;
+  }
+}
+
 void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64)
 {
 #ifndef PRODUCT
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Jul 05 20:38:50 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Jun 23 18:56:17 2015 +0000
@@ -465,44 +465,7 @@
 
   void movptr(Register r, uintptr_t imm64);
 
-  // Macro to mov replicated immediate to vector register.
-  // Where imm32 == hex abcdefgh, Vd will get the following values
-  // for different arrangements in T
-  //   T8B:  Vd = ghghghghghghghgh
-  //   T16B: Vd = ghghghghghghghghghghghghghghghgh
-  //   T4H:  Vd = efghefghefghefgh
-  //   T8H:  Vd = efghefghefghefghefghefghefghefgh
-  //   T2S:  Vd = abcdefghabcdefgh
-  //   T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
-  //   T1D/T2D: invalid
-  void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
-    assert(T != T1D && T != T2D, "invalid arrangement");
-    u_int32_t nimm32 = ~imm32;
-    if (T == T8B || T == T16B) { imm32 &= 0xff; nimm32 &= 0xff; }
-    if (T == T4H || T == T8H) { imm32 &= 0xffff; nimm32 &= 0xffff; }
-    u_int32_t x = imm32;
-    int movi_cnt = 0;
-    int movn_cnt = 0;
-    while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
-    x = nimm32;
-    while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
-    if (movn_cnt < movi_cnt) imm32 = nimm32;
-    unsigned lsl = 0;
-    while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
-    if (movn_cnt < movi_cnt)
-      mvni(Vd, T, imm32 & 0xff, lsl);
-    else
-      movi(Vd, T, imm32 & 0xff, lsl);
-    imm32 >>= 8; lsl += 8;
-    while (imm32) {
-      while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
-      if (movn_cnt < movi_cnt)
-        bici(Vd, T, imm32 & 0xff, lsl);
-      else
-        orri(Vd, T, imm32 & 0xff, lsl);
-      lsl += 8; imm32 >>= 8;
-    }
-  }
+  void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32);
 
   // macro instructions for accessing and updating floating point
   // status register