# HG changeset patch # User pli # Date 1551926346 0 # Node ID 1dbe0c210134b4e087196df51effaaa1b9fe8def # Parent f984aca565c16b8f4aff7ff5eded80b3b9efc8c1 8214922: Add vectorization support for fmin/fmax Reviewed-by: adinn, roland diff -r f984aca565c1 -r 1dbe0c210134 src/hotspot/cpu/aarch64/aarch64.ad --- a/src/hotspot/cpu/aarch64/aarch64.ad Mon Mar 11 21:26:19 2019 -0400 +++ b/src/hotspot/cpu/aarch64/aarch64.ad Thu Mar 07 02:39:06 2019 +0000 @@ -15660,6 +15660,98 @@ ins_pipe(pipe_class_default); %} +instruct reduce_max2F(vRegF dst, vRegF src1, vecD src2, vecD tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "fmaxs $dst, $src1, $src2\n\t" + "ins $tmp, S, $src2, 0, 1\n\t" + "fmaxs $dst, $dst, $tmp\t max reduction2F" %} + ins_encode %{ + __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, as_FloatRegister($src2$$reg), 0, 1); + __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_max4F(vRegF dst, vRegF src1, vecX src2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "fmaxv $dst, T4S, $src2\n\t" + "fmaxs $dst, $dst, $src1\t max reduction4F" %} + ins_encode %{ + __ fmaxv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src2$$reg)); + __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_max2D(vRegD dst, vRegD src1, vecX src2, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "fmaxd $dst, $src1, $src2\n\t" + "ins $tmp, D, $src2, 0, 1\n\t" + "fmaxd $dst, $dst, $tmp\t max reduction2D" %} + ins_encode %{ + __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ D, as_FloatRegister($src2$$reg), 0, 1); + __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_min2F(vRegF dst, vRegF src1, vecD src2, vecD tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "fmins $dst, $src1, $src2\n\t" + "ins $tmp, S, $src2, 0, 1\n\t" + "fmins $dst, $dst, $tmp\t min reduction2F" %} + ins_encode %{ + __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, as_FloatRegister($src2$$reg), 0, 1); + __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_min4F(vRegF dst, vRegF src1, vecX src2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "fminv $dst, T4S, $src2\n\t" + "fmins $dst, $dst, $src1\t min reduction4F" %} + ins_encode %{ + __ fminv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src2$$reg)); + __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_min2D(vRegD dst, vRegD src1, vecX src2, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "fmind $dst, $src1, $src2\n\t" + "ins $tmp, D, $src2, 0, 1\n\t" + "fmind $dst, $dst, $tmp\t min reduction2D" %} + ins_encode %{ + __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ D, as_FloatRegister($src2$$reg), 0, 1); + __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- @@ -17198,6 +17290,90 @@ ins_pipe(vshift128_imm); %} +instruct vmax2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmax $dst,$src1,$src2\t# vector (2F)" %} + ins_encode %{ + __ fmax(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp64); +%} + +instruct vmax4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmax $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmax(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vmax2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmax $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmax(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vmin2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmin $dst,$src1,$src2\t# vector (2F)" %} + ins_encode %{ + __ fmin(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp64); +%} + +instruct vmin4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmin $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmin(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vmin2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmin $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmin(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + //----------PEEPHOLE RULES----------------------------------------------------- // These must follow all instruction definitions as they use the names // defined in the instructions definitions. diff -r f984aca565c1 -r 1dbe0c210134 src/hotspot/cpu/aarch64/assembler_aarch64.hpp --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp Mon Mar 11 21:26:19 2019 -0400 +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp Thu Mar 07 02:39:06 2019 +0000 @@ -2240,6 +2240,19 @@ #undef INSN +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ + starti; \ + assert(T == T4S, "arrangement must be T4S"); \ + f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24), f(opc, 23), \ + f(T == T4S ? 0 : 1, 22), f(0b110000111110, 21, 10); rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(fmaxv, 0); + INSN(fminv, 1); + +#undef INSN + #define INSN(NAME, op0, cmode0) \ void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) { \ unsigned cmode = cmode0; \ @@ -2281,6 +2294,8 @@ INSN(fsub, 0, 1, 0b110101); INSN(fmla, 0, 0, 0b110011); INSN(fmls, 0, 1, 0b110011); + INSN(fmax, 0, 0, 0b111101); + INSN(fmin, 0, 1, 0b111101); #undef INSN diff -r f984aca565c1 -r 1dbe0c210134 src/hotspot/share/adlc/formssel.cpp --- a/src/hotspot/share/adlc/formssel.cpp Mon Mar 11 21:26:19 2019 -0400 +++ b/src/hotspot/share/adlc/formssel.cpp Thu Mar 07 02:39:06 2019 +0000 @@ -3802,6 +3802,7 @@ "AndI","AndL", "AndV", "MaxI","MinI","MaxF","MinF","MaxD","MinD", + "MaxV", "MinV", "MulI","MulL","MulF","MulD", "MulVS","MulVI","MulVL","MulVF","MulVD", "OrI","OrL", @@ -4177,6 +4178,7 @@ "NegVF","NegVD", "SqrtVD","SqrtVF", "AndV" ,"XorV" ,"OrV", + "MaxV", "MinV", "AddReductionVI", "AddReductionVL", "AddReductionVF", "AddReductionVD", "MulReductionVI", "MulReductionVL", @@ -4186,6 +4188,7 @@ "LShiftVB","LShiftVS","LShiftVI","LShiftVL", "RShiftVB","RShiftVS","RShiftVI","RShiftVL", "URShiftVB","URShiftVS","URShiftVI","URShiftVL", + "MaxReductionV", "MinReductionV", "ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD", "LoadVector","StoreVector", "FmaVD", "FmaVF","PopCountVI", diff -r f984aca565c1 -r 1dbe0c210134 src/hotspot/share/opto/classes.hpp --- a/src/hotspot/share/opto/classes.hpp Mon Mar 11 21:26:19 2019 -0400 +++ b/src/hotspot/share/opto/classes.hpp Thu Mar 07 02:39:06 2019 +0000 @@ -374,6 +374,10 @@ macro(AndV) macro(OrV) macro(XorV) +macro(MinV) +macro(MaxV) +macro(MinReductionV) +macro(MaxReductionV) macro(LoadVector) macro(StoreVector) macro(Pack) diff -r f984aca565c1 -r 1dbe0c210134 src/hotspot/share/opto/compile.cpp --- a/src/hotspot/share/opto/compile.cpp Mon Mar 11 21:26:19 2019 -0400 +++ b/src/hotspot/share/opto/compile.cpp Thu Mar 07 02:39:06 2019 +0000 @@ -3349,6 +3349,8 @@ case Op_MulReductionVL: case Op_MulReductionVF: case Op_MulReductionVD: + case Op_MinReductionV: + case Op_MaxReductionV: break; case Op_PackB: diff -r f984aca565c1 -r 1dbe0c210134 src/hotspot/share/opto/vectornode.cpp --- a/src/hotspot/share/opto/vectornode.cpp Mon Mar 11 21:26:19 2019 -0400 +++ b/src/hotspot/share/opto/vectornode.cpp Thu Mar 07 02:39:06 2019 +0000 @@ -178,6 +178,18 @@ case Op_XorI: case Op_XorL: return Op_XorV; + case Op_MinF: + assert(bt == T_FLOAT, "must be"); + return Op_MinV; + case Op_MinD: + assert(bt == T_DOUBLE, "must be"); + return Op_MinV; + case Op_MaxF: + assert(bt == T_FLOAT, "must be"); + return Op_MaxV; + case Op_MaxD: + assert(bt == T_DOUBLE, "must be"); + return Op_MaxV; case Op_LoadB: case Op_LoadUB: @@ -377,6 +389,9 @@ case Op_OrV: return new OrVNode (n1, n2, vt); case Op_XorV: return new XorVNode(n1, n2, vt); + case Op_MinV: return new MinVNode(n1, n2, vt); + case Op_MaxV: return new MaxVNode(n1, n2, vt); + case Op_MulAddVS2VI: return new MulAddVS2VINode(n1, n2, vt); default: fatal("Missed vector creation for '%s'", NodeClassNames[vopc]); @@ -582,6 +597,22 @@ assert(bt == T_DOUBLE, "must be"); vopc = Op_MulReductionVD; break; + case Op_MinF: + assert(bt == T_FLOAT, "must be"); + vopc = Op_MinReductionV; + break; + case Op_MinD: + assert(bt == T_DOUBLE, "must be"); + vopc = Op_MinReductionV; + break; + case Op_MaxF: + assert(bt == T_FLOAT, "must be"); + vopc = Op_MaxReductionV; + break; + case Op_MaxD: + assert(bt == T_DOUBLE, "must be"); + vopc = Op_MaxReductionV; + break; // TODO: add MulL for targets that support it default: break; @@ -606,6 +637,8 @@ case Op_MulReductionVL: return new MulReductionVLNode(ctrl, n1, n2); case Op_MulReductionVF: return new MulReductionVFNode(ctrl, n1, n2); case Op_MulReductionVD: return new MulReductionVDNode(ctrl, n1, n2); + case Op_MinReductionV: return new MinReductionVNode(ctrl, n1, n2); + case Op_MaxReductionV: return new MaxReductionVNode(ctrl, n1, n2); default: fatal("Missed vector creation for '%s'", NodeClassNames[vopc]); return NULL; diff -r f984aca565c1 -r 1dbe0c210134 src/hotspot/share/opto/vectornode.hpp --- a/src/hotspot/share/opto/vectornode.hpp Mon Mar 11 21:26:19 2019 -0400 +++ b/src/hotspot/share/opto/vectornode.hpp Thu Mar 07 02:39:06 2019 +0000 @@ -555,6 +555,78 @@ virtual int Opcode() const; }; +//------------------------------MinVNode-------------------------------------- +// Vector min +class MinVNode : public VectorNode { +public: + MinVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {} + virtual int Opcode() const; +}; + +//------------------------------MaxVNode-------------------------------------- +// Vector max +class MaxVNode : public VectorNode { +public: + MaxVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {} + virtual int Opcode() const; +}; + +//------------------------------MinReductionVNode-------------------------------------- +// Vector min as a reduction +class MinReductionVNode : public ReductionNode { +public: + MinReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} + virtual int Opcode() const; + virtual const Type* bottom_type() const { + BasicType bt = in(1)->bottom_type()->basic_type(); + if (bt == T_FLOAT) { + return Type::FLOAT; + } else if (bt == T_DOUBLE) { + return Type::DOUBLE; + } + assert(false, "unsupported basic type"); + return NULL; + } + virtual uint ideal_reg() const { + BasicType bt = in(1)->bottom_type()->basic_type(); + if (bt == T_FLOAT) { + return Op_RegF; + } else if (bt == T_DOUBLE) { + return Op_RegD; + } + assert(false, "unsupported basic type"); + return 0; + } +}; + +//------------------------------MaxReductionVNode-------------------------------------- +// Vector max as a reduction +class MaxReductionVNode : public ReductionNode { +public: + MaxReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} + virtual int Opcode() const; + virtual const Type* bottom_type() const { + BasicType bt = in(1)->bottom_type()->basic_type(); + if (bt == T_FLOAT) { + return Type::FLOAT; + } else { + return Type::DOUBLE; + } + assert(false, "unsupported basic type"); + return NULL; + } + virtual uint ideal_reg() const { + BasicType bt = in(1)->bottom_type()->basic_type(); + if (bt == T_FLOAT) { + return Op_RegF; + } else { + return Op_RegD; + } + assert(false, "unsupported basic type"); + return 0; + } +}; + //================================= M E M O R Y =============================== //------------------------------LoadVectorNode--------------------------------- diff -r f984aca565c1 -r 1dbe0c210134 src/hotspot/share/runtime/vmStructs.cpp --- a/src/hotspot/share/runtime/vmStructs.cpp Mon Mar 11 21:26:19 2019 -0400 +++ b/src/hotspot/share/runtime/vmStructs.cpp Thu Mar 07 02:39:06 2019 +0000 @@ -1808,6 +1808,10 @@ declare_c2_type(AndVNode, VectorNode) \ declare_c2_type(OrVNode, VectorNode) \ declare_c2_type(XorVNode, VectorNode) \ + declare_c2_type(MaxVNode, VectorNode) \ + declare_c2_type(MinVNode, VectorNode) \ + declare_c2_type(MaxReductionVNode, ReductionNode) \ + declare_c2_type(MinReductionVNode, ReductionNode) \ declare_c2_type(LoadVectorNode, LoadNode) \ declare_c2_type(StoreVectorNode, StoreNode) \ declare_c2_type(ReplicateBNode, VectorNode) \