8182583: AArch64: FMA Vectorization on aarch64
Reviewed-by: aph
Contributed-by: yang.zhang@linaro.org
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Fri Jun 30 10:36:32 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Fri Jun 23 09:25:27 2017 +0800
@@ -16777,6 +16777,48 @@
ins_pipe(vmla128);
%}
+// dst + src1 * src2
+instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseFMA && n->as_Vector()->length() == 2);
+ match(Set dst (FmaVF dst (Binary src1 src2)));
+ format %{ "fmla $dst,$src1,$src2\t# vector (2S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fmla(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vmuldiv_fp64);
+%}
+
+// dst + src1 * src2
+instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseFMA && n->as_Vector()->length() == 4);
+ match(Set dst (FmaVF dst (Binary src1 src2)));
+ format %{ "fmla $dst,$src1,$src2\t# vector (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fmla(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vmuldiv_fp128);
+%}
+
+// dst + src1 * src2
+instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseFMA && n->as_Vector()->length() == 2);
+ match(Set dst (FmaVD dst (Binary src1 src2)));
+ format %{ "fmla $dst,$src1,$src2\t# vector (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fmla(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vmuldiv_fp128);
+%}
+
// --------------------------------- MLS --------------------------------------
instruct vmls4S(vecD dst, vecD src1, vecD src2)
@@ -16836,6 +16878,51 @@
ins_pipe(vmla128);
%}
+// dst - src1 * src2
+instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseFMA && n->as_Vector()->length() == 2);
+ match(Set dst (FmaVF dst (Binary (NegVF src1) src2)));
+ match(Set dst (FmaVF dst (Binary src1 (NegVF src2))));
+ format %{ "fmls $dst,$src1,$src2\t# vector (2S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fmls(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vmuldiv_fp64);
+%}
+
+// dst - src1 * src2
+instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseFMA && n->as_Vector()->length() == 4);
+ match(Set dst (FmaVF dst (Binary (NegVF src1) src2)));
+ match(Set dst (FmaVF dst (Binary src1 (NegVF src2))));
+ format %{ "fmls $dst,$src1,$src2\t# vector (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fmls(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vmuldiv_fp128);
+%}
+
+// dst - src1 * src2
+instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseFMA && n->as_Vector()->length() == 2);
+ match(Set dst (FmaVD dst (Binary (NegVD src1) src2)));
+ match(Set dst (FmaVD dst (Binary src1 (NegVD src2))));
+ format %{ "fmls $dst,$src1,$src2\t# vector (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fmls(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vmuldiv_fp128);
+%}
+
// --------------------------------- DIV --------------------------------------
instruct vdiv2F(vecD dst, vecD src1, vecD src2)
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Jun 30 10:36:32 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Jun 23 09:25:27 2017 +0800
@@ -2201,6 +2201,8 @@
INSN(fdiv, 1, 0, 0b111111);
INSN(fmul, 1, 0, 0b110111);
INSN(fsub, 0, 1, 0b110101);
+ INSN(fmla, 0, 0, 0b110011);
+ INSN(fmls, 0, 1, 0b110011);
#undef INSN