# HG changeset patch # User vdeshpande # Date 1496866186 25200 # Node ID cf0da758e7b5da88ec755f6739494207e4b5e089 # Parent df19f7e4b9f79a614044786781b80f49fcff5aa0 8181616: FMA Vectorization on x86 Reviewed-by: kvn diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/cpu/x86/vm/assembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Jun 07 13:09:46 2017 -0700 @@ -5092,6 +5092,42 @@ emit_operand(dst, src); } +void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) { + assert(VM_Version::supports_fma(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xB8); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) { + assert(VM_Version::supports_fma(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xB8); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) { + assert(VM_Version::supports_fma(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xB8); + emit_operand(dst, src2); +} + +void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) { + assert(VM_Version::supports_fma(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xB8); + emit_operand(dst, src2); +} + void Assembler::divpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/cpu/x86/vm/assembler_x86.hpp --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Jun 07 13:09:46 2017 -0700 @@ -1906,6 +1906,11 @@ void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vfmadd231pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vfmadd231ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vfmadd231pd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vfmadd231ps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + // Divide Packed Floating-Point Values void divpd(XMMRegister dst, XMMRegister src); void divps(XMMRegister dst, XMMRegister src); diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Jun 07 13:09:46 2017 -0700 @@ -3165,8 +3165,37 @@ } } - - +// dst = c = a * b + c +void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) { + Assembler::vfmadd231pd(c, a, b, vector_len); + if (dst != c) { + vmovdqu(dst, c); + } +} + +// dst = c = a * b + c +void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) { + Assembler::vfmadd231ps(c, a, b, vector_len); + if (dst != c) { + vmovdqu(dst, c); + } +} + +// dst = c = a * b + c +void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) { + Assembler::vfmadd231pd(c, a, b, vector_len); + if (dst != c) { + vmovdqu(dst, c); + } +} + +// dst = c = a * b + c +void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) { + Assembler::vfmadd231ps(c, a, b, vector_len); + if (dst != c) { + vmovdqu(dst, c); + } +} void MacroAssembler::incrementl(AddressLiteral dst) { if (reachable(dst)) { diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Wed Jun 07 13:09:46 2017 -0700 @@ -456,6 +456,11 @@ void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); + void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); + void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); + void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); + void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); + // same as fcmp2int, but using SSE2 void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/cpu/x86/vm/vm_version_x86.cpp --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Wed Jun 07 13:09:46 2017 -0700 @@ -812,7 +812,7 @@ FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); } - if (supports_fma() && UseSSE >= 2) { + if (supports_fma()) { if (FLAG_IS_DEFAULT(UseFMA)) { UseFMA = true; } diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/cpu/x86/vm/vm_version_x86.hpp --- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Wed Jun 07 13:09:46 2017 -0700 @@ -732,7 +732,7 @@ static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); } static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); } static bool supports_sha() { return (_features & CPU_SHA) != 0; } - static bool supports_fma() { return (_features & CPU_FMA) != 0; } + static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); } static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; } // Intel features diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/cpu/x86/vm/x86.ad --- a/hotspot/src/cpu/x86/vm/x86.ad Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/x86.ad Wed Jun 07 13:09:46 2017 -0700 @@ -10520,3 +10520,161 @@ ins_pipe( pipe_slow ); %} +// --------------------------------- FMA -------------------------------------- + +// a * b + c +instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set c (FmaVD c (Binary a b))); + format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} + ins_cost(150); + ins_encode %{ + int vector_len = 0; + __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma2D_mem(vecX a, memory b, vecX c) %{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set c (FmaVD c (Binary a (LoadVector b)))); + format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} + ins_cost(150); + ins_encode %{ + int vector_len = 0; + __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + + +// a * b + c +instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set c (FmaVD c (Binary a b))); + format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} + ins_cost(150); + ins_encode %{ + int vector_len = 1; + __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma4D_mem(vecY a, memory b, vecY c) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set c (FmaVD c (Binary a (LoadVector b)))); + format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} + ins_cost(150); + ins_encode %{ + int vector_len = 1; + __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ + predicate(UseFMA && n->as_Vector()->length() == 8); + match(Set c (FmaVD c (Binary a b))); + format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} + ins_cost(150); + ins_encode %{ + int vector_len = 2; + __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ + predicate(UseFMA && n->as_Vector()->length() == 8); + match(Set c (FmaVD c (Binary a (LoadVector b)))); + format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} + ins_cost(150); + ins_encode %{ + int vector_len = 2; + __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set c (FmaVF c (Binary a b))); + format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} + ins_cost(150); + ins_encode %{ + int vector_len = 0; + __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma4F_mem(vecX a, memory b, vecX c) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set c (FmaVF c (Binary a (LoadVector b)))); + format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} + ins_cost(150); + ins_encode %{ + int vector_len = 0; + __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ + predicate(UseFMA && n->as_Vector()->length() == 8); + match(Set c (FmaVF c (Binary a b))); + format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} + ins_cost(150); + ins_encode %{ + int vector_len = 1; + __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma8F_mem(vecY a, memory b, vecY c) %{ + predicate(UseFMA && n->as_Vector()->length() == 8); + match(Set c (FmaVF c (Binary a (LoadVector b)))); + format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} + ins_cost(150); + ins_encode %{ + int vector_len = 1; + __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ + predicate(UseFMA && n->as_Vector()->length() == 16); + match(Set c (FmaVF c (Binary a b))); + format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} + ins_cost(150); + ins_encode %{ + int vector_len = 2; + __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// a * b + c +instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ + predicate(UseFMA && n->as_Vector()->length() == 16); + match(Set c (FmaVF c (Binary a (LoadVector b)))); + format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} + ins_cost(150); + ins_encode %{ + int vector_len = 2; + __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/share/vm/adlc/formssel.cpp --- a/hotspot/src/share/vm/adlc/formssel.cpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/share/vm/adlc/formssel.cpp Wed Jun 07 13:09:46 2017 -0700 @@ -4179,6 +4179,7 @@ "URShiftVB","URShiftVS","URShiftVI","URShiftVL", "ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD", "LoadVector","StoreVector", + "FmaVD", "FmaVF", // Next are not supported currently. "PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D", "ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD" diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/share/vm/opto/classes.hpp --- a/hotspot/src/share/vm/opto/classes.hpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/share/vm/opto/classes.hpp Wed Jun 07 13:09:46 2017 -0700 @@ -310,6 +310,8 @@ macro(MulReductionVF) macro(MulVD) macro(MulReductionVD) +macro(FmaVD) +macro(FmaVF) macro(DivVF) macro(DivVD) macro(AbsVF) diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/share/vm/opto/matcher.cpp --- a/hotspot/src/share/vm/opto/matcher.cpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/share/vm/opto/matcher.cpp Wed Jun 07 13:09:46 2017 -0700 @@ -977,7 +977,6 @@ // Use one stack to keep both: child's node/state and parent's node/index MStack mstack(max_stack * 2 * 2); // usually: C->live_nodes() * 2 * 2 mstack.push(n, Visit, NULL, -1); // set NULL as parent to indicate root - while (mstack.is_nonempty()) { C->check_node_count(NodeLimitFudgeFactor, "too many nodes matching instructions"); if (C->failing()) return NULL; @@ -2122,6 +2121,8 @@ case Op_EncodeISOArray: case Op_FmaD: case Op_FmaF: + case Op_FmaVD: + case Op_FmaVF: set_shared(n); // Force result into register (it will be anyways) break; case Op_ConP: { // Convert pointers above the centerline to NUL @@ -2311,7 +2312,9 @@ break; } case Op_FmaD: - case Op_FmaF: { + case Op_FmaF: + case Op_FmaVD: + case Op_FmaVF: { // Restructure into a binary tree for Matching. Node* pair = new BinaryNode(n->in(1), n->in(2)); n->set_req(2, pair); diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/share/vm/opto/superword.cpp --- a/hotspot/src/share/vm/opto/superword.cpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/share/vm/opto/superword.cpp Wed Jun 07 13:09:46 2017 -0700 @@ -2324,6 +2324,13 @@ const TypeVect* vt = TypeVect::make(bt, vlen); vn = new CMoveVDNode(cc, src1, src2, vt); NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();}) + } else if (opc == Op_FmaD || opc == Op_FmaF) { + // Promote operands to vector + Node* in1 = vector_opd(p, 1); + Node* in2 = vector_opd(p, 2); + Node* in3 = vector_opd(p, 3); + vn = VectorNode::make(opc, in1, in2, in3, vlen, velt_basic_type(n)); + vlen_in_bytes = vn->as_Vector()->length_in_bytes(); } else { if (do_reserve_copy()) { NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");}) diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/share/vm/opto/vectornode.cpp --- a/hotspot/src/share/vm/opto/vectornode.cpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/share/vm/opto/vectornode.cpp Wed Jun 07 13:09:46 2017 -0700 @@ -86,6 +86,12 @@ case Op_MulD: assert(bt == T_DOUBLE, "must be"); return Op_MulVD; + case Op_FmaD: + assert(bt == T_DOUBLE, "must be"); + return Op_FmaVD; + case Op_FmaF: + assert(bt == T_FLOAT, "must be"); + return Op_FmaVF; case Op_CMoveD: assert(bt == T_DOUBLE, "must be"); return Op_CMoveVD; @@ -259,6 +265,11 @@ *start = 2; *end = n->req(); break; + case Op_FmaD: + case Op_FmaF: + *start = 1; + *end = 4; // 3 vector operands + break; default: *start = 1; *end = n->req(); // default is all operands @@ -328,6 +339,19 @@ } +VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt) { + const TypeVect* vt = TypeVect::make(bt, vlen); + int vopc = VectorNode::opcode(opc, bt); + // This method should not be called for unimplemented vectors. + guarantee(vopc > 0, "Vector for '%s' is not implemented", NodeClassNames[opc]); + switch (vopc) { + case Op_FmaVD: return new FmaVDNode(n1, n2, n3, vt); + case Op_FmaVF: return new FmaVFNode(n1, n2, n3, vt); + } + fatal("Missed vector creation for '%s'", NodeClassNames[vopc]); + return NULL; +} + // Scalar promotion VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t) { BasicType bt = opd_t->array_element_basic_type(); diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/share/vm/opto/vectornode.hpp --- a/hotspot/src/share/vm/opto/vectornode.hpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/share/vm/opto/vectornode.hpp Wed Jun 07 13:09:46 2017 -0700 @@ -62,6 +62,7 @@ static VectorNode* scalar2vector(Node* s, uint vlen, const Type* opd_t); static VectorNode* shift_count(Node* shift, Node* cnt, uint vlen, BasicType bt); static VectorNode* make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt); + static VectorNode* make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt); static int opcode(int opc, BasicType bt); static bool implemented(int opc, uint vlen, BasicType bt); @@ -260,6 +261,22 @@ virtual int Opcode() const; }; +//------------------------------FmaVDNode-------------------------------------- +// Vector multiply double +class FmaVDNode : public VectorNode { +public: + FmaVDNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {} + virtual int Opcode() const; +}; + +//------------------------------FmaVFNode-------------------------------------- +// Vector multiply float +class FmaVFNode : public VectorNode { +public: + FmaVFNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {} + virtual int Opcode() const; +}; + //------------------------------CMoveVDNode-------------------------------------- // Vector multiply double class CMoveVDNode : public VectorNode { diff -r df19f7e4b9f7 -r cf0da758e7b5 hotspot/src/share/vm/runtime/vmStructs.cpp --- a/hotspot/src/share/vm/runtime/vmStructs.cpp Wed Jun 07 08:56:35 2017 -0700 +++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Wed Jun 07 13:09:46 2017 -0700 @@ -2057,6 +2057,8 @@ declare_c2_type(MulVFNode, VectorNode) \ declare_c2_type(MulReductionVFNode, ReductionNode) \ declare_c2_type(MulVDNode, VectorNode) \ + declare_c2_type(FmaVDNode, VectorNode) \ + declare_c2_type(FmaVFNode, VectorNode) \ declare_c2_type(CMoveVDNode, VectorNode) \ declare_c2_type(MulReductionVDNode, ReductionNode) \ declare_c2_type(DivVFNode, VectorNode) \