8171244: PPC64: Make interpreter's math entries consistent with C1 and C2 and support FMA
authormdoerr
Thu, 15 Dec 2016 14:24:04 +0100
changeset 42884 05815125c157
parent 42883 5495adce1e0d
child 42885 d85e387adbb6
8171244: PPC64: Make interpreter's math entries consistent with C1 and C2 and support FMA Reviewed-by: kvn, goetz
hotspot/src/cpu/ppc/vm/abstractInterpreter_ppc.cpp
hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp
hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
hotspot/src/cpu/ppc/vm/ppc.ad
hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp
hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
--- a/hotspot/src/cpu/ppc/vm/abstractInterpreter_ppc.cpp	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/cpu/ppc/vm/abstractInterpreter_ppc.cpp	Thu Dec 15 14:24:04 2016 +0100
@@ -51,19 +51,25 @@
   return i;
 }
 
-// Support abs and sqrt like in compiler.
-// For others we can use a normal (native) entry.
-bool AbstractInterpreter::math_entry_available(AbstractInterpreter::MethodKind kind) {
-  if (!InlineIntrinsics) return false;
-
-  return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) ||
-          (kind==Interpreter::java_lang_math_abs));
-}
-
 // These should never be compiled since the interpreter will prefer
 // the compiled version to the intrinsic version.
 bool AbstractInterpreter::can_be_compiled(methodHandle m) {
-  return !math_entry_available(method_kind(m));
+  switch (method_kind(m)) {
+    case Interpreter::java_lang_math_sin     : // fall thru
+    case Interpreter::java_lang_math_cos     : // fall thru
+    case Interpreter::java_lang_math_tan     : // fall thru
+    case Interpreter::java_lang_math_abs     : // fall thru
+    case Interpreter::java_lang_math_log     : // fall thru
+    case Interpreter::java_lang_math_log10   : // fall thru
+    case Interpreter::java_lang_math_sqrt    : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     : // fall thru
+    case Interpreter::java_lang_math_fmaD    : // fall thru
+    case Interpreter::java_lang_math_fmaF    :
+      return false;
+    default:
+      return true;
+  }
 }
 
 // How much stack a method activation needs in stack slots.
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Thu Dec 15 14:24:04 2016 +0100
@@ -460,16 +460,15 @@
     FCTIWZ_OPCODE  = (63u << OPCODE_SHIFT |   15u << 1),
     FRSP_OPCODE    = (63u << OPCODE_SHIFT |   12u << 1),
 
-    // WARNING: using fmadd results in a non-compliant vm. Some floating
-    // point tck tests will fail.
-    FMADD_OPCODE   = (59u << OPCODE_SHIFT |   29u << 1),
-    DMADD_OPCODE   = (63u << OPCODE_SHIFT |   29u << 1),
-    FMSUB_OPCODE   = (59u << OPCODE_SHIFT |   28u << 1),
-    DMSUB_OPCODE   = (63u << OPCODE_SHIFT |   28u << 1),
-    FNMADD_OPCODE  = (59u << OPCODE_SHIFT |   31u << 1),
-    DNMADD_OPCODE  = (63u << OPCODE_SHIFT |   31u << 1),
-    FNMSUB_OPCODE  = (59u << OPCODE_SHIFT |   30u << 1),
-    DNMSUB_OPCODE  = (63u << OPCODE_SHIFT |   30u << 1),
+    // Fused multiply-accumulate instructions.
+    FMADD_OPCODE   = (63u << OPCODE_SHIFT |   29u << 1),
+    FMADDS_OPCODE  = (59u << OPCODE_SHIFT |   29u << 1),
+    FMSUB_OPCODE   = (63u << OPCODE_SHIFT |   28u << 1),
+    FMSUBS_OPCODE  = (59u << OPCODE_SHIFT |   28u << 1),
+    FNMADD_OPCODE  = (63u << OPCODE_SHIFT |   31u << 1),
+    FNMADDS_OPCODE = (59u << OPCODE_SHIFT |   31u << 1),
+    FNMSUB_OPCODE  = (63u << OPCODE_SHIFT |   30u << 1),
+    FNMSUBS_OPCODE = (59u << OPCODE_SHIFT |   30u << 1),
 
     LFD_OPCODE     = (50u << OPCODE_SHIFT |   00u << 1),
     LFDU_OPCODE    = (51u << OPCODE_SHIFT |   00u << 1),
@@ -1939,6 +1938,26 @@
   inline void fdivs( FloatRegister d, FloatRegister a, FloatRegister b);
   inline void fdivs_(FloatRegister d, FloatRegister a, FloatRegister b);
 
+  // Fused multiply-accumulate instructions.
+  // WARNING: Use only when rounding between the 2 parts is not desired.
+  // Some floating point tck tests will fail if used incorrectly.
+  inline void fmadd(   FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fmadd_(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fmadds(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fmadds_( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fmsub(   FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fmsub_(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fmsubs(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fmsubs_( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fnmadd(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fnmadd_( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fnmadds( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fnmadds_(FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fnmsub(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fnmsub_( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fnmsubs( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+  inline void fnmsubs_(FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b);
+
   // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions
   inline void frsp(  FloatRegister d, FloatRegister b);
   inline void fctid( FloatRegister d, FloatRegister b);
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Thu Dec 15 14:24:04 2016 +0100
@@ -700,6 +700,26 @@
 inline void Assembler::fdivs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
 inline void Assembler::fdivs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
 
+// Fused multiply-accumulate instructions.
+// WARNING: Use only when rounding between the 2 parts is not desired.
+// Some floating point tck tests will fail if used incorrectly.
+inline void Assembler::fmadd(   FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FMADD_OPCODE   | frt(d) | fra(a) | frb(b) | frc(c) | rc(0)); }
+inline void Assembler::fmadd_(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FMADD_OPCODE   | frt(d) | fra(a) | frb(b) | frc(c) | rc(1)); }
+inline void Assembler::fmadds(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FMADDS_OPCODE  | frt(d) | fra(a) | frb(b) | frc(c) | rc(0)); }
+inline void Assembler::fmadds_( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FMADDS_OPCODE  | frt(d) | fra(a) | frb(b) | frc(c) | rc(1)); }
+inline void Assembler::fmsub(   FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FMSUB_OPCODE   | frt(d) | fra(a) | frb(b) | frc(c) | rc(0)); }
+inline void Assembler::fmsub_(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FMSUB_OPCODE   | frt(d) | fra(a) | frb(b) | frc(c) | rc(1)); }
+inline void Assembler::fmsubs(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FMSUBS_OPCODE  | frt(d) | fra(a) | frb(b) | frc(c) | rc(0)); }
+inline void Assembler::fmsubs_( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FMSUBS_OPCODE  | frt(d) | fra(a) | frb(b) | frc(c) | rc(1)); }
+inline void Assembler::fnmadd(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FNMADD_OPCODE  | frt(d) | fra(a) | frb(b) | frc(c) | rc(0)); }
+inline void Assembler::fnmadd_( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FNMADD_OPCODE  | frt(d) | fra(a) | frb(b) | frc(c) | rc(1)); }
+inline void Assembler::fnmadds( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FNMADDS_OPCODE | frt(d) | fra(a) | frb(b) | frc(c) | rc(0)); }
+inline void Assembler::fnmadds_(FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FNMADDS_OPCODE | frt(d) | fra(a) | frb(b) | frc(c) | rc(1)); }
+inline void Assembler::fnmsub(  FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FNMSUB_OPCODE  | frt(d) | fra(a) | frb(b) | frc(c) | rc(0)); }
+inline void Assembler::fnmsub_( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FNMSUB_OPCODE  | frt(d) | fra(a) | frb(b) | frc(c) | rc(1)); }
+inline void Assembler::fnmsubs( FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FNMSUBS_OPCODE | frt(d) | fra(a) | frb(b) | frc(c) | rc(0)); }
+inline void Assembler::fnmsubs_(FloatRegister d, FloatRegister a, FloatRegister c, FloatRegister b) { emit_int32( FNMSUBS_OPCODE | frt(d) | fra(a) | frb(b) | frc(c) | rc(1)); }
+
 // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions
 inline void Assembler::frsp(  FloatRegister d, FloatRegister b) { emit_int32( FRSP_OPCODE   | frt(d) | frb(b) | rc(0)); }
 inline void Assembler::fctid( FloatRegister d, FloatRegister b) { emit_int32( FCTID_OPCODE  | frt(d) | frb(b) | rc(0)); }
--- a/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp	Thu Dec 15 14:24:04 2016 +0100
@@ -292,28 +292,28 @@
 }
 
 
-void LIR_Assembler::emit_op3(LIR_Op3* op) {
-  const bool is_int = op->result_opr()->is_single_cpu();
-  Register Rdividend = is_int ? op->in_opr1()->as_register() : op->in_opr1()->as_register_lo();
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
+  const bool is_int = result->is_single_cpu();
+  Register Rdividend = is_int ? left->as_register() : left->as_register_lo();
   Register Rdivisor  = noreg;
-  Register Rscratch  = op->in_opr3()->as_register();
-  Register Rresult   = is_int ? op->result_opr()->as_register() : op->result_opr()->as_register_lo();
+  Register Rscratch  = temp->as_register();
+  Register Rresult   = is_int ? result->as_register() : result->as_register_lo();
   long divisor = -1;
 
-  if (op->in_opr2()->is_register()) {
-    Rdivisor = is_int ? op->in_opr2()->as_register() : op->in_opr2()->as_register_lo();
+  if (right->is_register()) {
+    Rdivisor = is_int ? right->as_register() : right->as_register_lo();
   } else {
-    divisor = is_int ? op->in_opr2()->as_constant_ptr()->as_jint()
-                     : op->in_opr2()->as_constant_ptr()->as_jlong();
+    divisor = is_int ? right->as_constant_ptr()->as_jint()
+                     : right->as_constant_ptr()->as_jlong();
   }
 
   assert(Rdividend != Rscratch, "");
   assert(Rdivisor  != Rscratch, "");
-  assert(op->code() == lir_idiv || op->code() == lir_irem, "Must be irem or idiv");
+  assert(code == lir_idiv || code == lir_irem, "Must be irem or idiv");
 
   if (Rdivisor == noreg) {
     if (divisor == 1) { // stupid, but can happen
-      if (op->code() == lir_idiv) {
+      if (code == lir_idiv) {
         __ mr_if_needed(Rresult, Rdividend);
       } else {
         __ li(Rresult, 0);
@@ -340,7 +340,7 @@
       }
       __ add(Rscratch, Rdividend, Rscratch);
 
-      if (op->code() == lir_idiv) {
+      if (code == lir_idiv) {
         if (is_int) {
           __ srawi(Rresult, Rscratch, log2);
         } else {
@@ -352,7 +352,7 @@
       }
 
     } else if (divisor == -1) {
-      if (op->code() == lir_idiv) {
+      if (code == lir_idiv) {
         __ neg(Rresult, Rdividend);
       } else {
         __ li(Rresult, 0);
@@ -360,7 +360,7 @@
 
     } else {
       __ load_const_optimized(Rscratch, divisor);
-      if (op->code() == lir_idiv) {
+      if (code == lir_idiv) {
         if (is_int) {
           __ divw(Rresult, Rdividend, Rscratch); // Can't divide minint/-1.
         } else {
@@ -389,7 +389,7 @@
     __ cmpdi(CCR0, Rdivisor, -1);
   }
   __ bne(CCR0, regular);
-  if (op->code() == lir_idiv) {
+  if (code == lir_idiv) {
     __ neg(Rresult, Rdividend);
     __ b(done);
     __ bind(regular);
@@ -415,6 +415,26 @@
 }
 
 
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  switch (op->code()) {
+  case lir_idiv:
+  case lir_irem:
+    arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(),
+                    op->result_opr(), op->info());
+    break;
+  case lir_fmad:
+    __ fmadd(op->result_opr()->as_double_reg(), op->in_opr1()->as_double_reg(),
+             op->in_opr2()->as_double_reg(), op->in_opr3()->as_double_reg());
+    break;
+  case lir_fmaf:
+    __ fmadds(op->result_opr()->as_float_reg(), op->in_opr1()->as_float_reg(),
+              op->in_opr2()->as_float_reg(), op->in_opr3()->as_float_reg());
+    break;
+  default: ShouldNotReachHere(); break;
+  }
+}
+
+
 void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
 #ifdef ASSERT
   assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Thu Dec 15 14:24:04 2016 +0100
@@ -1435,7 +1435,26 @@
 }
 
 void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
-  fatal("FMA intrinsic is not implemented on this platform");
+  assert(x->number_of_arguments() == 3, "wrong type");
+  assert(UseFMA, "Needs FMA instructions support.");
+  LIRItem value(x->argument_at(0), this);
+  LIRItem value1(x->argument_at(1), this);
+  LIRItem value2(x->argument_at(2), this);
+
+  value.load_item();
+  value1.load_item();
+  value2.load_item();
+
+  LIR_Opr calc_input = value.result();
+  LIR_Opr calc_input1 = value1.result();
+  LIR_Opr calc_input2 = value2.result();
+  LIR_Opr calc_result = rlock_result(x);
+
+  switch (x->id()) {
+  case vmIntrinsics::_fmaD: __ fmad(calc_input, calc_input1, calc_input2, calc_result); break;
+  case vmIntrinsics::_fmaF: __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break;
+  default:                  ShouldNotReachHere();
+  }
 }
 
 void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
--- a/hotspot/src/cpu/ppc/vm/ppc.ad	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad	Thu Dec 15 14:24:04 2016 +0100
@@ -9569,6 +9569,117 @@
   ins_pipe(pipe_class_default);
 %}
 
+
+// Multiply-Accumulate
+// src1 * src2 + src3
+instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
+  match(Set dst (FmaF src3 (Binary src1 src2)));
+
+  format %{ "FMADDS  $dst, $src1, $src2, $src3" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_fmadds);
+    __ fmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 + src3
+instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+  match(Set dst (FmaD src3 (Binary src1 src2)));
+
+  format %{ "FMADD   $dst, $src1, $src2, $src3" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_fmadd);
+    __ fmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3 = -(src1*src2-src3)
+instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
+  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
+  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
+
+  format %{ "FNMSUBS $dst, $src1, $src2, $src3" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_fnmsubs);
+    __ fnmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3 = -(src1*src2-src3)
+instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
+  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
+
+  format %{ "FNMSUB  $dst, $src1, $src2, $src3" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_fnmsub);
+    __ fnmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3 = -(src1*src2+src3)
+instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
+  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
+  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
+
+  format %{ "FNMADDS $dst, $src1, $src2, $src3" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_fnmadds);
+    __ fnmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3 = -(src1*src2+src3)
+instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
+  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
+
+  format %{ "FNMADD  $dst, $src1, $src2, $src3" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_fnmadd);
+    __ fnmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
+  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
+
+  format %{ "FMSUBS  $dst, $src1, $src2, $src3" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_fmsubs);
+    __ fmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
+
+  format %{ "FMSUB   $dst, $src1, $src2, $src3" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_fmsub);
+    __ fmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+
 //----------Logical Instructions-----------------------------------------------
 
 // And Instructions
--- a/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp	Thu Dec 15 14:24:04 2016 +0100
@@ -1134,14 +1134,57 @@
 // End of helpers
 
 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
-  if (!Interpreter::math_entry_available(kind)) {
-    NOT_PRODUCT(__ should_not_reach_here();)
-    return NULL;
+
+  // Decide what to do: Use same platform specific instructions and runtime calls as compilers.
+  bool use_instruction = false;
+  address runtime_entry = NULL;
+  int num_args = 1;
+  bool double_precision = true;
+
+  // PPC64 specific:
+  switch (kind) {
+    case Interpreter::java_lang_math_sqrt: use_instruction = VM_Version::has_fsqrt(); break;
+    case Interpreter::java_lang_math_abs:  use_instruction = true; break;
+    case Interpreter::java_lang_math_fmaF:
+    case Interpreter::java_lang_math_fmaD: use_instruction = UseFMA; break;
+    default: break; // Fall back to runtime call.
   }
 
+  switch (kind) {
+    case Interpreter::java_lang_math_sin  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);   break;
+    case Interpreter::java_lang_math_cos  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);   break;
+    case Interpreter::java_lang_math_tan  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);   break;
+    case Interpreter::java_lang_math_abs  : /* run interpreted */ break;
+    case Interpreter::java_lang_math_sqrt : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt);  break;
+    case Interpreter::java_lang_math_log  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);   break;
+    case Interpreter::java_lang_math_log10: runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); break;
+    case Interpreter::java_lang_math_pow  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); num_args = 2; break;
+    case Interpreter::java_lang_math_exp  : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);   break;
+    case Interpreter::java_lang_math_fmaF : /* run interpreted */ num_args = 3; double_precision = false; break;
+    case Interpreter::java_lang_math_fmaD : /* run interpreted */ num_args = 3; break;
+    default: ShouldNotReachHere();
+  }
+
+  // Use normal entry if neither instruction nor runtime call is used.
+  if (!use_instruction && runtime_entry == NULL) return NULL;
+
   address entry = __ pc();
 
-  __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
+  // Load arguments
+  assert(num_args <= 13, "passed in registers");
+  if (double_precision) {
+    int offset = (2 * num_args - 1) * Interpreter::stackElementSize;
+    for (int i = 0; i < num_args; ++i) {
+      __ lfd(as_FloatRegister(F1_ARG1->encoding() + i), offset, R15_esp);
+      offset -= 2 * Interpreter::stackElementSize;
+    }
+  } else {
+    int offset = num_args * Interpreter::stackElementSize;
+    for (int i = 0; i < num_args; ++i) {
+      __ lfs(as_FloatRegister(F1_ARG1->encoding() + i), offset, R15_esp);
+      offset -= Interpreter::stackElementSize;
+    }
+  }
 
   // Pop c2i arguments (if any) off when we return.
 #ifdef ASSERT
@@ -1152,15 +1195,30 @@
 #endif // ASSERT
   __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
 
-  if (kind == Interpreter::java_lang_math_sqrt) {
-    __ fsqrt(F1_RET, F1_RET);
-  } else if (kind == Interpreter::java_lang_math_abs) {
-    __ fabs(F1_RET, F1_RET);
+  if (use_instruction) {
+    switch (kind) {
+      case Interpreter::java_lang_math_sqrt: __ fsqrt(F1_RET, F1);          break;
+      case Interpreter::java_lang_math_abs:  __ fabs(F1_RET, F1);           break;
+      case Interpreter::java_lang_math_fmaF: __ fmadds(F1_RET, F1, F2, F3); break;
+      case Interpreter::java_lang_math_fmaD: __ fmadd(F1_RET, F1, F2, F3);  break;
+      default: ShouldNotReachHere();
+    }
   } else {
-    ShouldNotReachHere();
+    // Comment: Can use tail call if the unextended frame is always C ABI compliant:
+    //__ load_const_optimized(R12_scratch2, runtime_entry, R0);
+    //__ call_c_and_return_to_caller(R12_scratch2);
+
+    // Push a new C frame and save LR.
+    __ save_LR_CR(R0);
+    __ push_frame_reg_args(0, R11_scratch1);
+
+    __ call_VM_leaf(runtime_entry);
+
+    // Pop the C frame and restore LR.
+    __ pop_frame();
+    __ restore_LR_CR(R0);
   }
 
-  // And we're done.
   __ blr();
 
   __ flush();
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu Dec 15 14:24:04 2016 +0100
@@ -230,9 +230,8 @@
     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
   }
 
-  if (UseFMA) {
-    warning("FMA instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(UseFMA, false);
+  if (FLAG_IS_DEFAULT(UseFMA)) {
+    FLAG_SET_DEFAULT(UseFMA, true);
   }
 
   if (UseSHA) {
--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp	Tue Dec 13 17:45:13 2016 +0300
+++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp	Thu Dec 15 14:24:04 2016 +0100
@@ -299,10 +299,6 @@
   }
 
   static void initialize_method_handle_entries();
-
-  // PPC-only: Support abs and sqrt like in compiler.
-  // For others we can use a normal (native) entry.
-  static bool math_entry_available(MethodKind kind);
 };
 
 //------------------------------------------------------------------------------------------------------------------------