8162338: AArch64: Intrinsify fused mac operations
authorroland
Mon, 12 Dec 2016 11:29:51 -0800
changeset 42653 62a5d76872d4
parent 42652 dc2f4314cb2e
child 42654 6bf23e6fb9ca
8162338: AArch64: Intrinsify fused mac operations Reviewed-by: kvn
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/abstractInterpreter_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp
hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
hotspot/test/compiler/floatingpoint/TestFMA.java
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Mon Dec 12 11:29:51 2016 -0800
@@ -12997,137 +12997,146 @@
   ins_pipe(fp_dop_reg_reg_d);
 %}
 
-// We cannot use these fused mul w add/sub ops because they don't
-// produce the same result as the equivalent separated ops
-// (essentially they don't round the intermediate result). that's a
-// shame. leaving them here in case we can idenitfy cases where it is
-// legitimate to use them
-
-
-// instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
-//   match(Set dst (AddF (MulF src1 src2) src3));
-
-//   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
-
-//   ins_encode %{
-//     __ fmadds(as_FloatRegister($dst$$reg),
-//              as_FloatRegister($src1$$reg),
-//              as_FloatRegister($src2$$reg),
-//              as_FloatRegister($src3$$reg));
-//   %}
-
-//   ins_pipe(pipe_class_default);
-// %}
-
-// instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
-//   match(Set dst (AddD (MulD src1 src2) src3));
-
-//   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
-
-//   ins_encode %{
-//     __ fmaddd(as_FloatRegister($dst$$reg),
-//              as_FloatRegister($src1$$reg),
-//              as_FloatRegister($src2$$reg),
-//              as_FloatRegister($src3$$reg));
-//   %}
-
-//   ins_pipe(pipe_class_default);
-// %}
-
-// instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
-//   match(Set dst (AddF (MulF (NegF src1) src2) src3));
-//   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
-
-//   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
-
-//   ins_encode %{
-//     __ fmsubs(as_FloatRegister($dst$$reg),
-//               as_FloatRegister($src1$$reg),
-//               as_FloatRegister($src2$$reg),
-//              as_FloatRegister($src3$$reg));
-//   %}
-
-//   ins_pipe(pipe_class_default);
-// %}
-
-// instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
-//   match(Set dst (AddD (MulD (NegD src1) src2) src3));
-//   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
-
-//   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
-
-//   ins_encode %{
-//     __ fmsubd(as_FloatRegister($dst$$reg),
-//               as_FloatRegister($src1$$reg),
-//               as_FloatRegister($src2$$reg),
-//               as_FloatRegister($src3$$reg));
-//   %}
-
-//   ins_pipe(pipe_class_default);
-// %}
-
-// instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
-//   match(Set dst (SubF (MulF (NegF src1) src2) src3));
-//   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
-
-//   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
-
-//   ins_encode %{
-//     __ fnmadds(as_FloatRegister($dst$$reg),
-//                as_FloatRegister($src1$$reg),
-//                as_FloatRegister($src2$$reg),
-//                as_FloatRegister($src3$$reg));
-//   %}
-
-//   ins_pipe(pipe_class_default);
-// %}
-
-// instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
-//   match(Set dst (SubD (MulD (NegD src1) src2) src3));
-//   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
-
-//   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
-
-//   ins_encode %{
-//     __ fnmaddd(as_FloatRegister($dst$$reg),
-//                as_FloatRegister($src1$$reg),
-//                as_FloatRegister($src2$$reg),
-//                as_FloatRegister($src3$$reg));
-//   %}
-
-//   ins_pipe(pipe_class_default);
-// %}
-
-// instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
-//   match(Set dst (SubF (MulF src1 src2) src3));
-
-//   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
-
-//   ins_encode %{
-//     __ fnmsubs(as_FloatRegister($dst$$reg),
-//                as_FloatRegister($src1$$reg),
-//                as_FloatRegister($src2$$reg),
-//                as_FloatRegister($src3$$reg));
-//   %}
-
-//   ins_pipe(pipe_class_default);
-// %}
-
-// instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
-//   match(Set dst (SubD (MulD src1 src2) src3));
-
-//   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
-
-//   ins_encode %{
-//   // n.b. insn name should be fnmsubd
-//     __ fnmsub(as_FloatRegister($dst$$reg),
-//                as_FloatRegister($src1$$reg),
-//                as_FloatRegister($src2$$reg),
-//                as_FloatRegister($src3$$reg));
-//   %}
-
-//   ins_pipe(pipe_class_default);
-// %}
+// src1 * src2 + src3
+instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF src3 (Binary src1 src2)));
+
+  format %{ "fmadds   $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ fmadds(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg),
+             as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 + src3
+instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD src3 (Binary src1 src2)));
+
+  format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ fmaddd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg),
+             as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3
+instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
+  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
+
+  format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ fmsubs(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3
+instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
+  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
+
+  format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ fmsubd(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3
+instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
+  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
+
+  format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ fnmadds(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src1$$reg),
+               as_FloatRegister($src2$$reg),
+               as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3
+instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
+  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
+
+  format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ fnmaddd(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src1$$reg),
+               as_FloatRegister($src2$$reg),
+               as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
+
+  format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ fnmsubs(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src1$$reg),
+               as_FloatRegister($src2$$reg),
+               as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
+
+  format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+  // n.b. insn name should be fnmsubd
+    __ fnmsub(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
 
 
 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
--- a/hotspot/src/cpu/aarch64/vm/abstractInterpreter_aarch64.cpp	Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/abstractInterpreter_aarch64.cpp	Mon Dec 12 11:29:51 2016 -0800
@@ -65,7 +65,9 @@
     case Interpreter::java_lang_math_log10   : // fall thru
     case Interpreter::java_lang_math_sqrt    : // fall thru
     case Interpreter::java_lang_math_pow     : // fall thru
-    case Interpreter::java_lang_math_exp     :
+    case Interpreter::java_lang_math_exp     : // fall thru
+    case Interpreter::java_lang_math_fmaD    : // fall thru
+    case Interpreter::java_lang_math_fmaF    :
       return false;
     default:
       return true;
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Mon Dec 12 11:29:51 2016 -0800
@@ -1055,7 +1055,7 @@
   return exact_log2(elem_size);
 }
 
-void LIR_Assembler::emit_op3(LIR_Op3* op) {
+void LIR_Assembler::arithmetic_idiv(LIR_Op3* op, bool is_irem) {
   Register Rdividend = op->in_opr1()->as_register();
   Register Rdivisor  = op->in_opr2()->as_register();
   Register Rscratch  = op->in_opr3()->as_register();
@@ -1076,12 +1076,31 @@
     // convert division by a power of two into some shifts and logical operations
   }
 
-  if (op->code() == lir_irem) {
-    __ corrected_idivl(Rresult, Rdividend, Rdivisor, true, rscratch1);
-   } else if (op->code() == lir_idiv) {
-    __ corrected_idivl(Rresult, Rdividend, Rdivisor, false, rscratch1);
-  } else
-    ShouldNotReachHere();
+  __ corrected_idivl(Rresult, Rdividend, Rdivisor, is_irem, rscratch1);
+}
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  switch (op->code()) {
+  case lir_idiv:
+    arithmetic_idiv(op, false);
+    break;
+  case lir_irem:
+    arithmetic_idiv(op, true);
+    break;
+  case lir_fmad:
+    __ fmaddd(op->result_opr()->as_double_reg(),
+              op->in_opr1()->as_double_reg(),
+              op->in_opr2()->as_double_reg(),
+              op->in_opr3()->as_double_reg());
+    break;
+  case lir_fmaf:
+    __ fmadds(op->result_opr()->as_float_reg(),
+              op->in_opr1()->as_float_reg(),
+              op->in_opr2()->as_float_reg(),
+              op->in_opr3()->as_float_reg());
+    break;
+  default:      ShouldNotReachHere(); break;
+  }
 }
 
 void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp	Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp	Mon Dec 12 11:29:51 2016 -0800
@@ -75,6 +75,8 @@
     _deopt_handler_size = 7 * NativeInstruction::instruction_size
   };
 
+  void arithmetic_idiv(LIR_Op3* op, bool is_irem);
+
 public:
 
   void store_parameter(Register r, int offset_from_esp_in_words);
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Mon Dec 12 11:29:51 2016 -0800
@@ -1034,7 +1034,26 @@
 }
 
 void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
-  fatal("FMA intrinsic is not implemented on this platform");
+  assert(x->number_of_arguments() == 3, "wrong type");
+  assert(UseFMA, "Needs FMA instructions support.");
+  LIRItem value(x->argument_at(0), this);
+  LIRItem value1(x->argument_at(1), this);
+  LIRItem value2(x->argument_at(2), this);
+
+  value.load_item();
+  value1.load_item();
+  value2.load_item();
+
+  LIR_Opr calc_input = value.result();
+  LIR_Opr calc_input1 = value1.result();
+  LIR_Opr calc_input2 = value2.result();
+  LIR_Opr calc_result = rlock_result(x);
+
+  switch (x->id()) {
+  case vmIntrinsics::_fmaD:   __ fmad(calc_input, calc_input1, calc_input2, calc_result); break;
+  case vmIntrinsics::_fmaF:   __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break;
+  default:                    ShouldNotReachHere();
+  }
 }
 
 void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
--- a/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp	Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp	Mon Dec 12 11:29:51 2016 -0800
@@ -204,8 +204,25 @@
     generate_transcendental_entry(kind, 2);
     break;
   case Interpreter::java_lang_math_fmaD :
+    if (UseFMA) {
+      entry_point = __ pc();
+      __ ldrd(v0, Address(esp, 4 * Interpreter::stackElementSize));
+      __ ldrd(v1, Address(esp, 2 * Interpreter::stackElementSize));
+      __ ldrd(v2, Address(esp));
+      __ fmaddd(v0, v0, v1, v2);
+      __ mov(sp, r13); // Restore caller's SP
+    }
+    break;
   case Interpreter::java_lang_math_fmaF :
-    return NULL;
+    if (UseFMA) {
+      entry_point = __ pc();
+      __ ldrs(v0, Address(esp, 2 * Interpreter::stackElementSize));
+      __ ldrs(v1, Address(esp, Interpreter::stackElementSize));
+      __ ldrs(v2, Address(esp));
+      __ fmadds(v0, v0, v1, v2);
+      __ mov(sp, r13); // Restore caller's SP
+    }
+    break;
   default:
     ;
   }
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Mon Dec 12 11:29:51 2016 -0800
@@ -262,9 +262,8 @@
     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
   }
 
-  if (UseFMA) {
-    warning("FMA instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(UseFMA, false);
+  if (FLAG_IS_DEFAULT(UseFMA)) {
+    FLAG_SET_DEFAULT(UseFMA, true);
   }
 
   if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/floatingpoint/TestFMA.java	Mon Dec 12 11:29:51 2016 -0800
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2016, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8162338
+ * @summary intrinsify fused mac operations
+ * @run main/othervm -XX:-BackgroundCompilation -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestFMA
+ *
+ */
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+// Test all fused mac instructions that can be generated
+public class TestFMA {
+
+    @Test(args = {5.0F, 10.0F, 7.0F}, res = 57.0F)
+    static float test1(float a, float b, float c) {
+        return Math.fma(a, b, c);
+    }
+
+    @Test(args = {5.0D, 10.0D, 7.0D}, res = 57.0D)
+    static double test2(double a, double b, double c) {
+        return Math.fma(a, b, c);
+    }
+
+    @Test(args = {5.0F, 10.0F, 7.0F}, res = -43.0F)
+    static float test3(float a, float b, float c) {
+        return Math.fma(-a, b, c);
+    }
+
+    @Test(args = {5.0D, 10.0D, 7.0D}, res = -43.0D)
+    static double test4(double a, double b, double c) {
+        return Math.fma(-a, b, c);
+    }
+
+    @Test(args = {5.0F, 10.0F, 7.0F}, res = -43.0F)
+    static float test5(float a, float b, float c) {
+        return Math.fma(a, -b, c);
+    }
+
+    @Test(args = {5.0D, 10.0D, 7.0D}, res = -43.0D)
+    static double test6(double a, double b, double c) {
+        return Math.fma(a, -b, c);
+    }
+
+    @Test(args = {5.0F, 10.0F, 7.0F}, res = -57.0F)
+    static float test7(float a, float b, float c) {
+        return Math.fma(-a, b, -c);
+    }
+
+    @Test(args = {5.0D, 10.0D, 7.0D}, res = -57.0D)
+    static double test8(double a, double b, double c) {
+        return Math.fma(-a, b, -c);
+    }
+
+    @Test(args = {5.0F, 10.0F, 7.0F}, res = -57.0F)
+    static float test9(float a, float b, float c) {
+        return Math.fma(a, -b, -c);
+    }
+
+    @Test(args = {5.0D, 10.0D, 7.0D}, res = -57.0D)
+    static double test10(double a, double b, double c) {
+        return Math.fma(a, -b, -c);
+    }
+
+    @Test(args = {5.0F, 10.0F, 7.0F}, res = 43.0F)
+    static float test11(float a, float b, float c) {
+        return Math.fma(a, b, -c);
+    }
+
+    @Test(args = {5.0D, 10.0D, 7.0D}, res = 43.0D)
+    static double test12(double a, double b, double c) {
+        return Math.fma(a, b, -c);
+    }
+
+    static public void main(String[] args) throws Exception {
+        TestFMA t = new TestFMA();
+        for (Method m : t.getClass().getDeclaredMethods()) {
+            if (m.getName().matches("test[0-9]+?")) {
+                t.doTest(m);
+            }
+        }
+    }
+
+    @Retention(RetentionPolicy.RUNTIME)
+    @interface Test {
+        double[] args();
+        double res();
+    }
+
+    void doTest(Method m) throws Exception {
+        String name = m.getName();
+        System.out.println("Testing " + name);
+        Class retType = m.getReturnType();
+        Test test = m.getAnnotation(Test.class);
+        double[] args = test.args();
+        double expected = test.res();
+
+        for (int i = 0; i < 20000; i++) {
+            if (retType == double.class) {
+                Object res = m.invoke(null, (double)args[0], (double)args[1], (double)args[2]);
+                if ((double)res != expected) {
+                    throw new RuntimeException(name + " failed : " + (double)res + " != " + expected);
+                }
+            } else {
+                Object res = m.invoke(null, (float)args[0], (float)args[1], (float)args[2]);
+                if ((float)res != (float)expected) {
+                    throw new RuntimeException(name + " failed : " + (float)res + " != " + (float)expected);
+                }
+            }
+        }
+    }
+}