8215262: PPC64: FMA Vectorization on PPC64
authormhorie
Wed, 12 Dec 2018 12:36:53 -0500
changeset 53011 1c85328b7631
parent 53010 086dfcfc3731
child 53012 c1eed9867bf0
8215262: PPC64: FMA Vectorization on PPC64 Reviewed-by: mdoerr, gromero
src/hotspot/cpu/ppc/assembler_ppc.hpp
src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
src/hotspot/cpu/ppc/ppc.ad
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp	Thu Dec 13 08:36:10 2018 +0100
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp	Wed Dec 12 12:36:53 2018 -0500
@@ -539,6 +539,12 @@
     XVSUBDP_OPCODE = (60u << OPCODE_SHIFT |  104u << 3),
     XVMULSP_OPCODE = (60u << OPCODE_SHIFT |   80u << 3),
     XVMULDP_OPCODE = (60u << OPCODE_SHIFT |  112u << 3),
+    XVMADDASP_OPCODE=(60u << OPCODE_SHIFT |   65u << 3),
+    XVMADDADP_OPCODE=(60u << OPCODE_SHIFT |   97u << 3),
+    XVMSUBASP_OPCODE=(60u << OPCODE_SHIFT |   81u << 3),
+    XVMSUBADP_OPCODE=(60u << OPCODE_SHIFT |  113u << 3),
+    XVNMSUBASP_OPCODE=(60u<< OPCODE_SHIFT |  209u << 3),
+    XVNMSUBADP_OPCODE=(60u<< OPCODE_SHIFT |  241u << 3),
 
     // Deliver A Random Number (introduced with POWER9)
     DARN_OPCODE    = (31u << OPCODE_SHIFT |  755u << 1),
@@ -2227,6 +2233,12 @@
   inline void xvsubdp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
   inline void xvmulsp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
   inline void xvmuldp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmaddasp(VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmaddadp(VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmsubasp(VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmsubadp(VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvnmsubasp(VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvnmsubadp(VectorSRegister d, VectorSRegister a, VectorSRegister b);
 
   // VSX Extended Mnemonics
   inline void xxspltd(  VectorSRegister d, VectorSRegister a, int x);
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Thu Dec 13 08:36:10 2018 +0100
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Wed Dec 12 12:36:53 2018 -0500
@@ -790,6 +790,12 @@
 inline void Assembler::xvsubdp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVSUBDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
 inline void Assembler::xvmulsp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMULSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
 inline void Assembler::xvmuldp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMULDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmaddasp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMADDASP_OPCODE  | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmaddadp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMADDADP_OPCODE  | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmsubasp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMSUBASP_OPCODE  | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmsubadp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMSUBADP_OPCODE  | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvnmsubasp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVNMSUBASP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvnmsubadp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVNMSUBADP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
 inline void Assembler::mtvrd(   VectorRegister d, Register a)               { emit_int32( MTVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mfvrd(   Register        a, VectorRegister d)         { emit_int32( MFVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mtvrwz(  VectorRegister  d, Register a)               { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
--- a/src/hotspot/cpu/ppc/ppc.ad	Thu Dec 13 08:36:10 2018 +0100
+++ b/src/hotspot/cpu/ppc/ppc.ad	Wed Dec 12 12:36:53 2018 -0500
@@ -2257,6 +2257,9 @@
     return SuperwordUseVSX;
   case Op_PopCountVI:
     return (SuperwordUseVSX && UsePopCountInstruction);
+  case Op_FmaVF:
+  case Op_FmaVD:
+    return (SuperwordUseVSX && UseFMA);
   case Op_Digit:
   case Op_LowerCase:
   case Op_UpperCase:
@@ -14475,6 +14478,92 @@
   ins_pipe(pipe_class_default);
 %}
 
+// --------------------------------- FMA --------------------------------------
+// dst + src1 * src2
+instruct vfma4F(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (FmaVF dst (Binary src1 src2)));
+  predicate(n->as_Vector()->length() == 4);
+
+  format %{ "XVMADDASP   $dst, $src1, $src2" %}
+
+  size(4);
+  ins_encode %{
+    __ xvmaddasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// dst - src1 * src2
+instruct vfma4F_neg1(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (FmaVF dst (Binary (NegVF src1) src2)));
+  match(Set dst (FmaVF dst (Binary src1 (NegVF src2))));
+  predicate(n->as_Vector()->length() == 4);
+
+  format %{ "XVNMSUBASP   $dst, $src1, $src2" %}
+
+  size(4);
+  ins_encode %{
+    __ xvnmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// - dst + src1 * src2
+instruct vfma4F_neg2(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (FmaVF (NegVF dst) (Binary src1 src2)));
+  predicate(n->as_Vector()->length() == 4);
+
+  format %{ "XVMSUBASP   $dst, $src1, $src2" %}
+
+  size(4);
+  ins_encode %{
+    __ xvmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// dst + src1 * src2
+instruct vfma2D(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (FmaVD  dst (Binary src1 src2)));
+  predicate(n->as_Vector()->length() == 2);
+
+  format %{ "XVMADDADP   $dst, $src1, $src2" %}
+
+  size(4);
+  ins_encode %{
+    __ xvmaddadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// dst - src1 * src2
+instruct vfma2D_neg1(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
+  match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
+  predicate(n->as_Vector()->length() == 2);
+
+  format %{ "XVNMSUBADP   $dst, $src1, $src2" %}
+
+  size(4);
+  ins_encode %{
+    __ xvnmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// - dst + src1 * src2
+instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (FmaVD (NegVD dst) (Binary src1 src2)));
+  predicate(n->as_Vector()->length() == 2);
+
+  format %{ "XVMSUBADP   $dst, $src1, $src2" %}
+
+  size(4);
+  ins_encode %{
+    __ xvmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
 
 //----------Overflow Math Instructions-----------------------------------------