--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon Dec 12 11:29:51 2016 -0800
@@ -12997,137 +12997,146 @@
ins_pipe(fp_dop_reg_reg_d);
%}
-// We cannot use these fused mul w add/sub ops because they don't
-// produce the same result as the equivalent separated ops
-// (essentially they don't round the intermediate result). that's a
-// shame. leaving them here in case we can idenitfy cases where it is
-// legitimate to use them
-
-
-// instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
-// match(Set dst (AddF (MulF src1 src2) src3));
-
-// format %{ "fmadds $dst, $src1, $src2, $src3" %}
-
-// ins_encode %{
-// __ fmadds(as_FloatRegister($dst$$reg),
-// as_FloatRegister($src1$$reg),
-// as_FloatRegister($src2$$reg),
-// as_FloatRegister($src3$$reg));
-// %}
-
-// ins_pipe(pipe_class_default);
-// %}
-
-// instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
-// match(Set dst (AddD (MulD src1 src2) src3));
-
-// format %{ "fmaddd $dst, $src1, $src2, $src3" %}
-
-// ins_encode %{
-// __ fmaddd(as_FloatRegister($dst$$reg),
-// as_FloatRegister($src1$$reg),
-// as_FloatRegister($src2$$reg),
-// as_FloatRegister($src3$$reg));
-// %}
-
-// ins_pipe(pipe_class_default);
-// %}
-
-// instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
-// match(Set dst (AddF (MulF (NegF src1) src2) src3));
-// match(Set dst (AddF (NegF (MulF src1 src2)) src3));
-
-// format %{ "fmsubs $dst, $src1, $src2, $src3" %}
-
-// ins_encode %{
-// __ fmsubs(as_FloatRegister($dst$$reg),
-// as_FloatRegister($src1$$reg),
-// as_FloatRegister($src2$$reg),
-// as_FloatRegister($src3$$reg));
-// %}
-
-// ins_pipe(pipe_class_default);
-// %}
-
-// instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
-// match(Set dst (AddD (MulD (NegD src1) src2) src3));
-// match(Set dst (AddD (NegD (MulD src1 src2)) src3));
-
-// format %{ "fmsubd $dst, $src1, $src2, $src3" %}
-
-// ins_encode %{
-// __ fmsubd(as_FloatRegister($dst$$reg),
-// as_FloatRegister($src1$$reg),
-// as_FloatRegister($src2$$reg),
-// as_FloatRegister($src3$$reg));
-// %}
-
-// ins_pipe(pipe_class_default);
-// %}
-
-// instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
-// match(Set dst (SubF (MulF (NegF src1) src2) src3));
-// match(Set dst (SubF (NegF (MulF src1 src2)) src3));
-
-// format %{ "fnmadds $dst, $src1, $src2, $src3" %}
-
-// ins_encode %{
-// __ fnmadds(as_FloatRegister($dst$$reg),
-// as_FloatRegister($src1$$reg),
-// as_FloatRegister($src2$$reg),
-// as_FloatRegister($src3$$reg));
-// %}
-
-// ins_pipe(pipe_class_default);
-// %}
-
-// instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
-// match(Set dst (SubD (MulD (NegD src1) src2) src3));
-// match(Set dst (SubD (NegD (MulD src1 src2)) src3));
-
-// format %{ "fnmaddd $dst, $src1, $src2, $src3" %}
-
-// ins_encode %{
-// __ fnmaddd(as_FloatRegister($dst$$reg),
-// as_FloatRegister($src1$$reg),
-// as_FloatRegister($src2$$reg),
-// as_FloatRegister($src3$$reg));
-// %}
-
-// ins_pipe(pipe_class_default);
-// %}
-
-// instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
-// match(Set dst (SubF (MulF src1 src2) src3));
-
-// format %{ "fnmsubs $dst, $src1, $src2, $src3" %}
-
-// ins_encode %{
-// __ fnmsubs(as_FloatRegister($dst$$reg),
-// as_FloatRegister($src1$$reg),
-// as_FloatRegister($src2$$reg),
-// as_FloatRegister($src3$$reg));
-// %}
-
-// ins_pipe(pipe_class_default);
-// %}
-
-// instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
-// match(Set dst (SubD (MulD src1 src2) src3));
-
-// format %{ "fnmsubd $dst, $src1, $src2, $src3" %}
-
-// ins_encode %{
-// // n.b. insn name should be fnmsubd
-// __ fnmsub(as_FloatRegister($dst$$reg),
-// as_FloatRegister($src1$$reg),
-// as_FloatRegister($src2$$reg),
-// as_FloatRegister($src3$$reg));
-// %}
-
-// ins_pipe(pipe_class_default);
-// %}
+// src1 * src2 + src3
+instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF src3 (Binary src1 src2)));
+
+ format %{ "fmadds $dst, $src1, $src2, $src3" %}
+
+ ins_encode %{
+ __ fmadds(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 + src3
+instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD src3 (Binary src1 src2)));
+
+ format %{ "fmaddd $dst, $src1, $src2, $src3" %}
+
+ ins_encode %{
+ __ fmaddd(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3
+instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
+ match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
+
+ format %{ "fmsubs $dst, $src1, $src2, $src3" %}
+
+ ins_encode %{
+ __ fmsubs(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3
+instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
+ match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
+
+ format %{ "fmsubd $dst, $src1, $src2, $src3" %}
+
+ ins_encode %{
+ __ fmsubd(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3
+instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
+ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
+
+ format %{ "fnmadds $dst, $src1, $src2, $src3" %}
+
+ ins_encode %{
+ __ fnmadds(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3
+instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
+ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
+
+ format %{ "fnmaddd $dst, $src1, $src2, $src3" %}
+
+ ins_encode %{
+ __ fnmaddd(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
+
+ format %{ "fnmsubs $dst, $src1, $src2, $src3" %}
+
+ ins_encode %{
+ __ fnmsubs(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
+
+ format %{ "fnmsubd $dst, $src1, $src2, $src3" %}
+
+ ins_encode %{
+ // n.b. insn name should be fnmsubd
+ __ fnmsub(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
--- a/hotspot/src/cpu/aarch64/vm/abstractInterpreter_aarch64.cpp Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/abstractInterpreter_aarch64.cpp Mon Dec 12 11:29:51 2016 -0800
@@ -65,7 +65,9 @@
case Interpreter::java_lang_math_log10 : // fall thru
case Interpreter::java_lang_math_sqrt : // fall thru
case Interpreter::java_lang_math_pow : // fall thru
- case Interpreter::java_lang_math_exp :
+ case Interpreter::java_lang_math_exp : // fall thru
+ case Interpreter::java_lang_math_fmaD : // fall thru
+ case Interpreter::java_lang_math_fmaF :
return false;
default:
return true;
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Mon Dec 12 11:29:51 2016 -0800
@@ -1055,7 +1055,7 @@
return exact_log2(elem_size);
}
-void LIR_Assembler::emit_op3(LIR_Op3* op) {
+void LIR_Assembler::arithmetic_idiv(LIR_Op3* op, bool is_irem) {
Register Rdividend = op->in_opr1()->as_register();
Register Rdivisor = op->in_opr2()->as_register();
Register Rscratch = op->in_opr3()->as_register();
@@ -1076,12 +1076,31 @@
// convert division by a power of two into some shifts and logical operations
}
- if (op->code() == lir_irem) {
- __ corrected_idivl(Rresult, Rdividend, Rdivisor, true, rscratch1);
- } else if (op->code() == lir_idiv) {
- __ corrected_idivl(Rresult, Rdividend, Rdivisor, false, rscratch1);
- } else
- ShouldNotReachHere();
+ __ corrected_idivl(Rresult, Rdividend, Rdivisor, is_irem, rscratch1);
+}
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+ switch (op->code()) {
+ case lir_idiv:
+ arithmetic_idiv(op, false);
+ break;
+ case lir_irem:
+ arithmetic_idiv(op, true);
+ break;
+ case lir_fmad:
+ __ fmaddd(op->result_opr()->as_double_reg(),
+ op->in_opr1()->as_double_reg(),
+ op->in_opr2()->as_double_reg(),
+ op->in_opr3()->as_double_reg());
+ break;
+ case lir_fmaf:
+ __ fmadds(op->result_opr()->as_float_reg(),
+ op->in_opr1()->as_float_reg(),
+ op->in_opr2()->as_float_reg(),
+ op->in_opr3()->as_float_reg());
+ break;
+ default: ShouldNotReachHere(); break;
+ }
}
void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp Mon Dec 12 11:29:51 2016 -0800
@@ -75,6 +75,8 @@
_deopt_handler_size = 7 * NativeInstruction::instruction_size
};
+ void arithmetic_idiv(LIR_Op3* op, bool is_irem);
+
public:
void store_parameter(Register r, int offset_from_esp_in_words);
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Mon Dec 12 11:29:51 2016 -0800
@@ -1034,7 +1034,26 @@
}
void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
- fatal("FMA intrinsic is not implemented on this platform");
+ assert(x->number_of_arguments() == 3, "wrong type");
+ assert(UseFMA, "Needs FMA instructions support.");
+ LIRItem value(x->argument_at(0), this);
+ LIRItem value1(x->argument_at(1), this);
+ LIRItem value2(x->argument_at(2), this);
+
+ value.load_item();
+ value1.load_item();
+ value2.load_item();
+
+ LIR_Opr calc_input = value.result();
+ LIR_Opr calc_input1 = value1.result();
+ LIR_Opr calc_input2 = value2.result();
+ LIR_Opr calc_result = rlock_result(x);
+
+ switch (x->id()) {
+ case vmIntrinsics::_fmaD: __ fmad(calc_input, calc_input1, calc_input2, calc_result); break;
+ case vmIntrinsics::_fmaF: __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break;
+ default: ShouldNotReachHere();
+ }
}
void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
--- a/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp Mon Dec 12 11:29:51 2016 -0800
@@ -204,8 +204,25 @@
generate_transcendental_entry(kind, 2);
break;
case Interpreter::java_lang_math_fmaD :
+ if (UseFMA) {
+ entry_point = __ pc();
+ __ ldrd(v0, Address(esp, 4 * Interpreter::stackElementSize));
+ __ ldrd(v1, Address(esp, 2 * Interpreter::stackElementSize));
+ __ ldrd(v2, Address(esp));
+ __ fmaddd(v0, v0, v1, v2);
+ __ mov(sp, r13); // Restore caller's SP
+ }
+ break;
case Interpreter::java_lang_math_fmaF :
- return NULL;
+ if (UseFMA) {
+ entry_point = __ pc();
+ __ ldrs(v0, Address(esp, 2 * Interpreter::stackElementSize));
+ __ ldrs(v1, Address(esp, Interpreter::stackElementSize));
+ __ ldrs(v2, Address(esp));
+ __ fmadds(v0, v0, v1, v2);
+ __ mov(sp, r13); // Restore caller's SP
+ }
+ break;
default:
;
}
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Dec 12 21:18:54 2016 +0300
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Dec 12 11:29:51 2016 -0800
@@ -262,9 +262,8 @@
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
}
- if (UseFMA) {
- warning("FMA instructions are not available on this CPU");
- FLAG_SET_DEFAULT(UseFMA, false);
+ if (FLAG_IS_DEFAULT(UseFMA)) {
+ FLAG_SET_DEFAULT(UseFMA, true);
}
if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/floatingpoint/TestFMA.java Mon Dec 12 11:29:51 2016 -0800
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2016, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8162338
+ * @summary intrinsify fused mac operations
+ * @run main/othervm -XX:-BackgroundCompilation -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestFMA
+ *
+ */
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+// Test all fused mac instructions that can be generated
+public class TestFMA {
+
+ @Test(args = {5.0F, 10.0F, 7.0F}, res = 57.0F)
+ static float test1(float a, float b, float c) {
+ return Math.fma(a, b, c);
+ }
+
+ @Test(args = {5.0D, 10.0D, 7.0D}, res = 57.0D)
+ static double test2(double a, double b, double c) {
+ return Math.fma(a, b, c);
+ }
+
+ @Test(args = {5.0F, 10.0F, 7.0F}, res = -43.0F)
+ static float test3(float a, float b, float c) {
+ return Math.fma(-a, b, c);
+ }
+
+ @Test(args = {5.0D, 10.0D, 7.0D}, res = -43.0D)
+ static double test4(double a, double b, double c) {
+ return Math.fma(-a, b, c);
+ }
+
+ @Test(args = {5.0F, 10.0F, 7.0F}, res = -43.0F)
+ static float test5(float a, float b, float c) {
+ return Math.fma(a, -b, c);
+ }
+
+ @Test(args = {5.0D, 10.0D, 7.0D}, res = -43.0D)
+ static double test6(double a, double b, double c) {
+ return Math.fma(a, -b, c);
+ }
+
+ @Test(args = {5.0F, 10.0F, 7.0F}, res = -57.0F)
+ static float test7(float a, float b, float c) {
+ return Math.fma(-a, b, -c);
+ }
+
+ @Test(args = {5.0D, 10.0D, 7.0D}, res = -57.0D)
+ static double test8(double a, double b, double c) {
+ return Math.fma(-a, b, -c);
+ }
+
+ @Test(args = {5.0F, 10.0F, 7.0F}, res = -57.0F)
+ static float test9(float a, float b, float c) {
+ return Math.fma(a, -b, -c);
+ }
+
+ @Test(args = {5.0D, 10.0D, 7.0D}, res = -57.0D)
+ static double test10(double a, double b, double c) {
+ return Math.fma(a, -b, -c);
+ }
+
+ @Test(args = {5.0F, 10.0F, 7.0F}, res = 43.0F)
+ static float test11(float a, float b, float c) {
+ return Math.fma(a, b, -c);
+ }
+
+ @Test(args = {5.0D, 10.0D, 7.0D}, res = 43.0D)
+ static double test12(double a, double b, double c) {
+ return Math.fma(a, b, -c);
+ }
+
+ static public void main(String[] args) throws Exception {
+ TestFMA t = new TestFMA();
+ for (Method m : t.getClass().getDeclaredMethods()) {
+ if (m.getName().matches("test[0-9]+?")) {
+ t.doTest(m);
+ }
+ }
+ }
+
+ @Retention(RetentionPolicy.RUNTIME)
+ @interface Test {
+ double[] args();
+ double res();
+ }
+
+ void doTest(Method m) throws Exception {
+ String name = m.getName();
+ System.out.println("Testing " + name);
+ Class retType = m.getReturnType();
+ Test test = m.getAnnotation(Test.class);
+ double[] args = test.args();
+ double expected = test.res();
+
+ for (int i = 0; i < 20000; i++) {
+ if (retType == double.class) {
+ Object res = m.invoke(null, (double)args[0], (double)args[1], (double)args[2]);
+ if ((double)res != expected) {
+ throw new RuntimeException(name + " failed : " + (double)res + " != " + expected);
+ }
+ } else {
+ Object res = m.invoke(null, (float)args[0], (float)args[1], (float)args[2]);
+ if ((float)res != (float)expected) {
+ throw new RuntimeException(name + " failed : " + (float)res + " != " + (float)expected);
+ }
+ }
+ }
+ }
+}