8129551: aarch64: some regressions introduced by addition of vectorisation code
Summary: Fix regressions
Reviewed-by: kvn
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Wed Jul 05 20:38:50 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Tue Jun 23 18:56:17 2015 +0000
@@ -13276,7 +13276,7 @@
ins_cost(INSN_COST);
format %{ "movi $dst, $con\t# vector(16B)" %}
ins_encode %{
- __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
+ __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
%}
ins_pipe(pipe_class_default);
%}
@@ -13298,7 +13298,7 @@
ins_cost(INSN_COST);
format %{ "movi $dst, $con\t# vector(8H)" %}
ins_encode %{
- __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
+ __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
%}
ins_pipe(pipe_class_default);
%}
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Wed Jul 05 20:38:50 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Jun 23 18:56:17 2015 +0000
@@ -491,6 +491,11 @@
i->rf(_index, 16);
i->f(_ext.option(), 15, 13);
unsigned size = i->get(31, 30);
+ if (i->get(26, 26) && i->get(23, 23)) {
+ // SIMD Q Type - Size = 128 bits
+ assert(size == 0, "bad size");
+ size = 0b100;
+ }
if (size == 0) // It's a byte
i->f(_ext.shift() >= 0, 12);
else {
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Jul 05 20:38:50 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jun 23 18:56:17 2015 +0000
@@ -1408,6 +1408,52 @@
movk(r, imm64 & 0xffff, 32);
}
+// Macro to mov replicated immediate to vector register.
+// Vd will get the following values for different arrangements in T
+// imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh
+// imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
+// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh
+// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
+// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
+// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
+// T1D/T2D: invalid
+void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
+ assert(T != T1D && T != T2D, "invalid arrangement");
+ if (T == T8B || T == T16B) {
+ assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
+ movi(Vd, T, imm32 & 0xff, 0);
+ return;
+ }
+ u_int32_t nimm32 = ~imm32;
+ if (T == T4H || T == T8H) {
+ assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
+ imm32 &= 0xffff;
+ nimm32 &= 0xffff;
+ }
+ u_int32_t x = imm32;
+ int movi_cnt = 0;
+ int movn_cnt = 0;
+ while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
+ x = nimm32;
+ while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
+ if (movn_cnt < movi_cnt) imm32 = nimm32;
+ unsigned lsl = 0;
+ while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
+ if (movn_cnt < movi_cnt)
+ mvni(Vd, T, imm32 & 0xff, lsl);
+ else
+ movi(Vd, T, imm32 & 0xff, lsl);
+ imm32 >>= 8; lsl += 8;
+ while (imm32) {
+ while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
+ if (movn_cnt < movi_cnt)
+ bici(Vd, T, imm32 & 0xff, lsl);
+ else
+ orri(Vd, T, imm32 & 0xff, lsl);
+ lsl += 8; imm32 >>= 8;
+ }
+}
+
void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64)
{
#ifndef PRODUCT
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Jul 05 20:38:50 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Jun 23 18:56:17 2015 +0000
@@ -465,44 +465,7 @@
void movptr(Register r, uintptr_t imm64);
- // Macro to mov replicated immediate to vector register.
- // Where imm32 == hex abcdefgh, Vd will get the following values
- // for different arrangements in T
- // T8B: Vd = ghghghghghghghgh
- // T16B: Vd = ghghghghghghghghghghghghghghghgh
- // T4H: Vd = efghefghefghefgh
- // T8H: Vd = efghefghefghefghefghefghefghefgh
- // T2S: Vd = abcdefghabcdefgh
- // T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
- // T1D/T2D: invalid
- void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
- assert(T != T1D && T != T2D, "invalid arrangement");
- u_int32_t nimm32 = ~imm32;
- if (T == T8B || T == T16B) { imm32 &= 0xff; nimm32 &= 0xff; }
- if (T == T4H || T == T8H) { imm32 &= 0xffff; nimm32 &= 0xffff; }
- u_int32_t x = imm32;
- int movi_cnt = 0;
- int movn_cnt = 0;
- while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
- x = nimm32;
- while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
- if (movn_cnt < movi_cnt) imm32 = nimm32;
- unsigned lsl = 0;
- while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
- if (movn_cnt < movi_cnt)
- mvni(Vd, T, imm32 & 0xff, lsl);
- else
- movi(Vd, T, imm32 & 0xff, lsl);
- imm32 >>= 8; lsl += 8;
- while (imm32) {
- while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
- if (movn_cnt < movi_cnt)
- bici(Vd, T, imm32 & 0xff, lsl);
- else
- orri(Vd, T, imm32 & 0xff, lsl);
- lsl += 8; imm32 >>= 8;
- }
- }
+ void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32);
// macro instructions for accessing and updating floating point
// status register