8182583: AArch64: FMA Vectorization on aarch64
authornjian
Fri, 23 Jun 2017 09:25:27 +0800
changeset 46606 211fbfdbc30b
parent 46605 358794f12717
child 46607 cbe334268a4c
8182583: AArch64: FMA Vectorization on aarch64 Reviewed-by: aph Contributed-by: yang.zhang@linaro.org
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Fri Jun 30 10:36:32 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Fri Jun 23 09:25:27 2017 +0800
@@ -16777,6 +16777,48 @@
   ins_pipe(vmla128);
 %}
 
+// dst + src1 * src2
+instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseFMA && n->as_Vector()->length() == 2);
+  match(Set dst (FmaVF  dst (Binary src1 src2)));
+  format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ fmla(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(vmuldiv_fp64);
+%}
+
+// dst + src1 * src2
+instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseFMA && n->as_Vector()->length() == 4);
+  match(Set dst (FmaVF  dst (Binary src1 src2)));
+  format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ fmla(as_FloatRegister($dst$$reg), __ T4S,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(vmuldiv_fp128);
+%}
+
+// dst + src1 * src2
+instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseFMA && n->as_Vector()->length() == 2);
+  match(Set dst (FmaVD  dst (Binary src1 src2)));
+  format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ fmla(as_FloatRegister($dst$$reg), __ T2D,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(vmuldiv_fp128);
+%}
+
 // --------------------------------- MLS --------------------------------------
 
 instruct vmls4S(vecD dst, vecD src1, vecD src2)
@@ -16836,6 +16878,51 @@
   ins_pipe(vmla128);
 %}
 
+// dst - src1 * src2
+instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseFMA && n->as_Vector()->length() == 2);
+  match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
+  match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
+  format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ fmls(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(vmuldiv_fp64);
+%}
+
+// dst - src1 * src2
+instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseFMA && n->as_Vector()->length() == 4);
+  match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
+  match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
+  format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ fmls(as_FloatRegister($dst$$reg), __ T4S,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(vmuldiv_fp128);
+%}
+
+// dst - src1 * src2
+instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseFMA && n->as_Vector()->length() == 2);
+  match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
+  match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
+  format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ fmls(as_FloatRegister($dst$$reg), __ T2D,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(vmuldiv_fp128);
+%}
+
 // --------------------------------- DIV --------------------------------------
 
 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Fri Jun 30 10:36:32 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Fri Jun 23 09:25:27 2017 +0800
@@ -2201,6 +2201,8 @@
   INSN(fdiv, 1, 0, 0b111111);
   INSN(fmul, 1, 0, 0b110111);
   INSN(fsub, 0, 1, 0b110101);
+  INSN(fmla, 0, 0, 0b110011);
+  INSN(fmls, 0, 1, 0b110011);
 
 #undef INSN