8181809: PPC64: Leverage mtfprd/mffprd on POWER8
authormdoerr
Tue, 04 Jul 2017 15:11:25 +0200
changeset 46615 3fd9b25850f4
parent 46614 ae1105fff9e4
child 46616 66d452cca30f
8181809: PPC64: Leverage mtfprd/mffprd on POWER8 Reviewed-by: mdoerr, simonis Contributed-by: Matthew Brandyberry <mbrandy@linux.vnet.ibm.com>
hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp
hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
hotspot/src/cpu/ppc/vm/interp_masm_ppc.hpp
hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
hotspot/src/cpu/ppc/vm/ppc.ad
hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp
hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Tue Jul 04 15:11:25 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -511,6 +511,7 @@
     STXVD2X_OPCODE = (31u << OPCODE_SHIFT |  972u << 1),
     MTVSRD_OPCODE  = (31u << OPCODE_SHIFT |  179u << 1),
     MFVSRD_OPCODE  = (31u << OPCODE_SHIFT |   51u << 1),
+    MTVSRWA_OPCODE = (31u << OPCODE_SHIFT |  211u << 1),
 
     // Vector Permute and Formatting
     VPKPX_OPCODE   = (4u  << OPCODE_SHIFT |  782u     ),
@@ -2138,6 +2139,11 @@
   inline void mtvrd(    VectorRegister  d, Register a);
   inline void mfvrd(    Register        a, VectorRegister d);
 
+  // Vector-Scalar (VSX) instructions.
+  inline void mtfprd(   FloatRegister   d, Register a);
+  inline void mtfprwa(  FloatRegister   d, Register a);
+  inline void mffprd(   Register        a, FloatRegister d);
+
   // AES (introduced with Power 8)
   inline void vcipher(     VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vcipherlast( VectorRegister d, VectorRegister a, VectorRegister b);
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Tue Jul 04 15:11:25 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -765,6 +765,11 @@
 inline void Assembler::mtvrd(  VectorRegister  d, Register a)               { emit_int32( MTVSRD_OPCODE  | vrt(d)  | ra(a)  | 1u); } // 1u: d is treated as Vector (VMX/Altivec).
 inline void Assembler::mfvrd(  Register        a, VectorRegister d)         { emit_int32( MFVSRD_OPCODE  | vrt(d)  | ra(a)  | 1u); } // 1u: d is treated as Vector (VMX/Altivec).
 
+// Vector-Scalar (VSX) instructions.
+inline void Assembler::mtfprd(  FloatRegister   d, Register a)      { emit_int32( MTVSRD_OPCODE  | frt(d)  | ra(a)); }
+inline void Assembler::mtfprwa( FloatRegister   d, Register a)      { emit_int32( MTVSRWA_OPCODE | frt(d)  | ra(a)); }
+inline void Assembler::mffprd(  Register        a, FloatRegister d) { emit_int32( MFVSRD_OPCODE  | frt(d)  | ra(a)); }
+
 inline void Assembler::vpkpx(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKPX_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vpkshss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHSS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vpkswss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWSS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
--- a/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp	Tue Jul 04 15:11:25 2017 +0200
@@ -514,25 +514,48 @@
     }
     case Bytecodes::_i2d:
     case Bytecodes::_l2d: {
-      __ fcfid(dst->as_double_reg(), src->as_double_reg()); // via mem
+      bool src_in_memory = !VM_Version::has_mtfprd();
+      FloatRegister rdst = dst->as_double_reg();
+      FloatRegister rsrc;
+      if (src_in_memory) {
+        rsrc = src->as_double_reg(); // via mem
+      } else {
+        // move src to dst register
+        if (code == Bytecodes::_i2d) {
+          __ mtfprwa(rdst, src->as_register());
+        } else {
+          __ mtfprd(rdst, src->as_register_lo());
+        }
+        rsrc = rdst;
+      }
+      __ fcfid(rdst, rsrc);
       break;
     }
-    case Bytecodes::_i2f: {
+    case Bytecodes::_i2f:
+    case Bytecodes::_l2f: {
+      bool src_in_memory = !VM_Version::has_mtfprd();
       FloatRegister rdst = dst->as_float_reg();
-      FloatRegister rsrc = src->as_double_reg(); // via mem
+      FloatRegister rsrc;
+      if (src_in_memory) {
+        rsrc = src->as_double_reg(); // via mem
+      } else {
+        // move src to dst register
+        if (code == Bytecodes::_i2f) {
+          __ mtfprwa(rdst, src->as_register());
+        } else {
+          __ mtfprd(rdst, src->as_register_lo());
+        }
+        rsrc = rdst;
+      }
       if (VM_Version::has_fcfids()) {
         __ fcfids(rdst, rsrc);
       } else {
+        assert(code == Bytecodes::_i2f, "fcfid+frsp needs fixup code to avoid rounding incompatibility");
         __ fcfid(rdst, rsrc);
         __ frsp(rdst, rdst);
       }
       break;
     }
-    case Bytecodes::_l2f: { // >= Power7
-      assert(VM_Version::has_fcfids(), "fcfid+frsp needs fixup code to avoid rounding incompatibility");
-      __ fcfids(dst->as_float_reg(), src->as_double_reg()); // via mem
-      break;
-    }
     case Bytecodes::_f2d: {
       __ fmr_if_needed(dst->as_double_reg(), src->as_float_reg());
       break;
@@ -543,31 +566,49 @@
     }
     case Bytecodes::_d2i:
     case Bytecodes::_f2i: {
+      bool dst_in_memory = !VM_Version::has_mtfprd();
       FloatRegister rsrc = (code == Bytecodes::_d2i) ? src->as_double_reg() : src->as_float_reg();
-      Address       addr = frame_map()->address_for_slot(dst->double_stack_ix());
+      Address       addr = dst_in_memory ? frame_map()->address_for_slot(dst->double_stack_ix()) : NULL;
       Label L;
       // Result must be 0 if value is NaN; test by comparing value to itself.
       __ fcmpu(CCR0, rsrc, rsrc);
-      __ li(R0, 0); // 0 in case of NAN
-      __ std(R0, addr.disp(), addr.base());
+      if (dst_in_memory) {
+        __ li(R0, 0); // 0 in case of NAN
+        __ std(R0, addr.disp(), addr.base());
+      } else {
+        __ li(dst->as_register(), 0);
+      }
       __ bso(CCR0, L);
       __ fctiwz(rsrc, rsrc); // USE_KILL
-      __ stfd(rsrc, addr.disp(), addr.base());
+      if (dst_in_memory) {
+        __ stfd(rsrc, addr.disp(), addr.base());
+      } else {
+        __ mffprd(dst->as_register(), rsrc);
+      }
       __ bind(L);
       break;
     }
     case Bytecodes::_d2l:
     case Bytecodes::_f2l: {
+      bool dst_in_memory = !VM_Version::has_mtfprd();
       FloatRegister rsrc = (code == Bytecodes::_d2l) ? src->as_double_reg() : src->as_float_reg();
-      Address       addr = frame_map()->address_for_slot(dst->double_stack_ix());
+      Address       addr = dst_in_memory ? frame_map()->address_for_slot(dst->double_stack_ix()) : NULL;
       Label L;
       // Result must be 0 if value is NaN; test by comparing value to itself.
       __ fcmpu(CCR0, rsrc, rsrc);
-      __ li(R0, 0); // 0 in case of NAN
-      __ std(R0, addr.disp(), addr.base());
+      if (dst_in_memory) {
+        __ li(R0, 0); // 0 in case of NAN
+        __ std(R0, addr.disp(), addr.base());
+      } else {
+        __ li(dst->as_register_lo(), 0);
+      }
       __ bso(CCR0, L);
       __ fctidz(rsrc, rsrc); // USE_KILL
-      __ stfd(rsrc, addr.disp(), addr.base());
+      if (dst_in_memory) {
+        __ stfd(rsrc, addr.disp(), addr.base());
+      } else {
+        __ mffprd(dst->as_register_lo(), rsrc);
+      }
       __ bind(L);
       break;
     }
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Tue Jul 04 15:11:25 2017 +0200
@@ -871,81 +871,91 @@
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
-  switch (x->op()) {
+  if (!VM_Version::has_mtfprd()) {
+    switch (x->op()) {
+
+      // int -> float: force spill
+      case Bytecodes::_l2f: {
+        if (!VM_Version::has_fcfids()) { // fcfids is >= Power7 only
+          // fcfid+frsp needs fixup code to avoid rounding incompatibility.
+          address entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f);
+          LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL);
+          set_result(x, result);
+          return;
+        } // else fallthru
+      }
+      case Bytecodes::_l2d: {
+        LIRItem value(x->value(), this);
+        LIR_Opr reg = rlock_result(x);
+        value.load_item();
+        LIR_Opr tmp = force_to_spill(value.result(), T_DOUBLE);
+        __ convert(x->op(), tmp, reg);
+        return;
+      }
+      case Bytecodes::_i2f:
+      case Bytecodes::_i2d: {
+        LIRItem value(x->value(), this);
+        LIR_Opr reg = rlock_result(x);
+        value.load_item();
+        // Convert i2l first.
+        LIR_Opr tmp1 = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, value.result(), tmp1);
+        LIR_Opr tmp2 = force_to_spill(tmp1, T_DOUBLE);
+        __ convert(x->op(), tmp2, reg);
+        return;
+      }
 
-    // int -> float: force spill
-    case Bytecodes::_l2f: {
-      if (!VM_Version::has_fcfids()) { // fcfids is >= Power7 only
-        // fcfid+frsp needs fixup code to avoid rounding incompatibility.
-        address entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f);
-        LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL);
-        set_result(x, result);
+      // float -> int: result will be stored
+      case Bytecodes::_f2l:
+      case Bytecodes::_d2l: {
+        LIRItem value(x->value(), this);
+        LIR_Opr reg = rlock_result(x);
+        value.set_destroys_register(); // USE_KILL
+        value.load_item();
+        set_vreg_flag(reg, must_start_in_memory);
+        __ convert(x->op(), value.result(), reg);
+        return;
+      }
+      case Bytecodes::_f2i:
+      case Bytecodes::_d2i: {
+        LIRItem value(x->value(), this);
+        LIR_Opr reg = rlock_result(x);
+        value.set_destroys_register(); // USE_KILL
+        value.load_item();
+        // Convert l2i afterwards.
+        LIR_Opr tmp1 = new_register(T_LONG);
+        set_vreg_flag(tmp1, must_start_in_memory);
+        __ convert(x->op(), value.result(), tmp1);
+        __ convert(Bytecodes::_l2i, tmp1, reg);
+        return;
+      }
+
+      // Within same category: just register conversions.
+      case Bytecodes::_i2b:
+      case Bytecodes::_i2c:
+      case Bytecodes::_i2s:
+      case Bytecodes::_i2l:
+      case Bytecodes::_l2i:
+      case Bytecodes::_f2d:
+      case Bytecodes::_d2f:
         break;
-      } // else fallthru
+
+      default: ShouldNotReachHere();
     }
-    case Bytecodes::_l2d: {
-      LIRItem value(x->value(), this);
-      LIR_Opr reg = rlock_result(x);
-      value.load_item();
-      LIR_Opr tmp = force_to_spill(value.result(), T_DOUBLE);
-      __ convert(x->op(), tmp, reg);
-      break;
-    }
-    case Bytecodes::_i2f:
-    case Bytecodes::_i2d: {
-      LIRItem value(x->value(), this);
-      LIR_Opr reg = rlock_result(x);
-      value.load_item();
-      // Convert i2l first.
-      LIR_Opr tmp1 = new_register(T_LONG);
-      __ convert(Bytecodes::_i2l, value.result(), tmp1);
-      LIR_Opr tmp2 = force_to_spill(tmp1, T_DOUBLE);
-      __ convert(x->op(), tmp2, reg);
-      break;
-    }
+  }
 
-    // float -> int: result will be stored
+  // Register conversion.
+  LIRItem value(x->value(), this);
+  LIR_Opr reg = rlock_result(x);
+  value.load_item();
+  switch (x->op()) {
     case Bytecodes::_f2l:
-    case Bytecodes::_d2l: {
-      LIRItem value(x->value(), this);
-      LIR_Opr reg = rlock_result(x);
-      value.set_destroys_register(); // USE_KILL
-      value.load_item();
-      set_vreg_flag(reg, must_start_in_memory);
-      __ convert(x->op(), value.result(), reg);
-      break;
-    }
+    case Bytecodes::_d2l:
     case Bytecodes::_f2i:
-    case Bytecodes::_d2i: {
-      LIRItem value(x->value(), this);
-      LIR_Opr reg = rlock_result(x);
-      value.set_destroys_register(); // USE_KILL
-      value.load_item();
-      // Convert l2i afterwards.
-      LIR_Opr tmp1 = new_register(T_LONG);
-      set_vreg_flag(tmp1, must_start_in_memory);
-      __ convert(x->op(), value.result(), tmp1);
-      __ convert(Bytecodes::_l2i, tmp1, reg);
-      break;
-    }
-
-    // Within same category: just register conversions.
-    case Bytecodes::_i2b:
-    case Bytecodes::_i2c:
-    case Bytecodes::_i2s:
-    case Bytecodes::_i2l:
-    case Bytecodes::_l2i:
-    case Bytecodes::_f2d:
-    case Bytecodes::_d2f: {
-      LIRItem value(x->value(), this);
-      LIR_Opr reg = rlock_result(x);
-      value.load_item();
-      __ convert(x->op(), value.result(), reg);
-      break;
-    }
-
-    default: ShouldNotReachHere();
+    case Bytecodes::_d2i: value.set_destroys_register(); break; // USE_KILL
+    default: break;
   }
+  __ convert(x->op(), value.result(), reg);
 }
 
 
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc.hpp	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc.hpp	Tue Jul 04 15:11:25 2017 +0200
@@ -99,8 +99,8 @@
 
   void push_2ptrs(Register first, Register second);
 
-  void push_l_pop_d(Register l = R17_tos, FloatRegister d = F15_ftos);
-  void push_d_pop_l(FloatRegister d = F15_ftos, Register l = R17_tos);
+  void move_l_to_d(Register l = R17_tos, FloatRegister d = F15_ftos);
+  void move_d_to_l(FloatRegister d = F15_ftos, Register l = R17_tos);
 
   void pop (TosState state);           // transition vtos -> state
   void push(TosState state);           // transition state -> vtos
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Tue Jul 04 15:11:25 2017 +0200
@@ -284,14 +284,22 @@
   addi(R15_esp, R15_esp, - 2 * Interpreter::stackElementSize );
 }
 
-void InterpreterMacroAssembler::push_l_pop_d(Register l, FloatRegister d) {
-  std(l, 0, R15_esp);
-  lfd(d, 0, R15_esp);
+void InterpreterMacroAssembler::move_l_to_d(Register l, FloatRegister d) {
+  if (VM_Version::has_mtfprd()) {
+    mtfprd(d, l);
+  } else {
+    std(l, 0, R15_esp);
+    lfd(d, 0, R15_esp);
+  }
 }
 
-void InterpreterMacroAssembler::push_d_pop_l(FloatRegister d, Register l) {
-  stfd(d, 0, R15_esp);
-  ld(l, 0, R15_esp);
+void InterpreterMacroAssembler::move_d_to_l(FloatRegister d, Register l) {
+  if (VM_Version::has_mtfprd()) {
+    mffprd(l, d);
+  } else {
+    stfd(d, 0, R15_esp);
+    ld(l, 0, R15_esp);
+  }
 }
 
 void InterpreterMacroAssembler::push(TosState state) {
--- a/hotspot/src/cpu/ppc/vm/ppc.ad	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad	Tue Jul 04 15:11:25 2017 +0200
@@ -3079,6 +3079,17 @@
     __ bind(done);
   %}
 
+  enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+
+    MacroAssembler _masm(&cbuf);
+    Label done;
+    __ bso($crx$$CondRegister, done);
+    __ mffprd($dst$$Register, $src$$FloatRegister);
+    // TODO PPC port __ endgroup_if_needed(_size == 12);
+    __ bind(done);
+  %}
+
   enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
 
@@ -10126,9 +10137,36 @@
 //   float intBitsToFloat(int bits)
 //
 // Notes on the implementation on ppc64:
-// We only provide rules which move between a register and a stack-location,
-// because we always have to go through memory when moving between a float
-// register and an integer register.
+// For Power7 and earlier, the rules are limited to those which move between a
+// register and a stack-location, because we always have to go through memory
+// when moving between a float register and an integer register.
+// This restriction is removed in Power8 with the introduction of the mtfprd
+// and mffprd instructions.
+
+instruct moveL2D_reg(regD dst, iRegLsrc src) %{
+  match(Set dst (MoveL2D src));
+  predicate(VM_Version::has_mtfprd());
+
+  format %{ "MTFPRD  $dst, $src" %}
+  size(4);
+  ins_encode %{
+    __ mtfprd($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct moveI2D_reg(regD dst, iRegIsrc src) %{
+  // no match-rule, false predicate
+  effect(DEF dst, USE src);
+  predicate(false);
+
+  format %{ "MTFPRWA $dst, $src" %}
+  size(4);
+  ins_encode %{
+    __ mtfprwa($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
 
 //---------- Chain stack slots between similar types --------
 
@@ -10641,6 +10679,20 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{
+  // no match-rule, false predicate
+  effect(DEF dst, USE crx, USE src);
+  predicate(false);
+
+  ins_variable_size_depending_on_alignment(true);
+
+  format %{ "cmovI   $crx, $dst, $src" %}
+  // Worst case is branch + move + stop, no stop without scheduler.
+  size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
+  ins_encode( enc_cmove_bso_reg(dst, crx, src) );
+  ins_pipe(pipe_class_default);
+%}
+
 instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE mem);
@@ -10695,9 +10747,64 @@
   %}
 %}
 
+instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{
+  // no match-rule, false predicate
+  effect(DEF dst, USE crx, USE src);
+  predicate(false);
+
+  format %{ "CmovI   $dst, $crx, $src \t// postalloc expanded" %}
+  postalloc_expand %{
+    //
+    // replaces
+    //
+    //   region  dst  crx  src
+    //    \       |    |   /
+    //     dst=cmovI_bso_reg_conLvalue0
+    //
+    // with
+    //
+    //   region  dst
+    //    \       /
+    //     dst=loadConI16(0)
+    //      |
+    //      ^  region  dst  crx  src
+    //      |   \       |    |    /
+    //      dst=cmovI_bso_reg
+    //
+
+    // Create new nodes.
+    MachNode *m1 = new loadConI16Node();
+    MachNode *m2 = new cmovI_bso_regNode();
+
+    // inputs for new nodes
+    m1->add_req(n_region);
+    m2->add_req(n_region, n_crx, n_src);
+
+    // precedences for new nodes
+    m2->add_prec(m1);
+
+    // operands for new nodes
+    m1->_opnds[0] = op_dst;
+    m1->_opnds[1] = new immI16Oper(0);
+
+    m2->_opnds[0] = op_dst;
+    m2->_opnds[1] = op_crx;
+    m2->_opnds[2] = op_src;
+
+    // registers for new nodes
+    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
+    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
+
+    // Insert new nodes.
+    nodes->push(m1);
+    nodes->push(m2);
+  %}
+%}
+
 // Double to Int conversion, NaN is mapped to 0.
 instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{
   match(Set dst (ConvD2I src));
+  predicate(!VM_Version::has_mtfprd());
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -10711,6 +10818,21 @@
   %}
 %}
 
+// Double to Int conversion, NaN is mapped to 0. Special version for Power8.
+instruct convD2I_reg_mffprd_ExEx(iRegIdst dst, regD src) %{
+  match(Set dst (ConvD2I src));
+  predicate(VM_Version::has_mtfprd());
+  ins_cost(DEFAULT_COST);
+
+  expand %{
+    regD tmpD;
+    flagsReg crx;
+    cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
+    convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
+    cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
+  %}
+%}
+
 instruct convF2IRaw_regF(regF dst, regF src) %{
   // no match-rule, false predicate
   effect(DEF dst, USE src);
@@ -10728,6 +10850,7 @@
 // Float to Int conversion, NaN is mapped to 0.
 instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{
   match(Set dst (ConvF2I src));
+  predicate(!VM_Version::has_mtfprd());
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -10741,6 +10864,21 @@
   %}
 %}
 
+// Float to Int conversion, NaN is mapped to 0. Special version for Power8.
+instruct convF2I_regF_mffprd_ExEx(iRegIdst dst, regF src) %{
+  match(Set dst (ConvF2I src));
+  predicate(VM_Version::has_mtfprd());
+  ins_cost(DEFAULT_COST);
+
+  expand %{
+    regF tmpF;
+    flagsReg crx;
+    cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
+    convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
+    cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
+  %}
+%}
+
 // Convert to Long
 
 instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
@@ -10810,6 +10948,20 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
+  // no match-rule, false predicate
+  effect(DEF dst, USE crx, USE src);
+  predicate(false);
+
+  ins_variable_size_depending_on_alignment(true);
+
+  format %{ "cmovL   $crx, $dst, $src" %}
+  // Worst case is branch + move + stop, no stop without scheduler.
+  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
+  ins_encode( enc_cmove_bso_reg(dst, crx, src) );
+  ins_pipe(pipe_class_default);
+%}
+
 instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE mem);
@@ -10861,9 +11013,61 @@
   %}
 %}
 
+instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{
+  // no match-rule, false predicate
+  effect(DEF dst, USE crx, USE src);
+  predicate(false);
+
+  format %{ "CmovL   $dst, $crx, $src \t// postalloc expanded" %}
+  postalloc_expand %{
+    //
+    // replaces
+    //
+    //   region  dst  crx  src
+    //    \       |    |   /
+    //     dst=cmovL_bso_reg_conLvalue0
+    //
+    // with
+    //
+    //   region  dst
+    //    \       /
+    //     dst=loadConL16(0)
+    //      |
+    //      ^  region  dst  crx  src
+    //      |   \       |    |    /
+    //      dst=cmovL_bso_reg
+    //
+
+    // Create new nodes.
+    MachNode *m1 = new loadConL16Node();
+    MachNode *m2 = new cmovL_bso_regNode();
+
+    // inputs for new nodes
+    m1->add_req(n_region);
+    m2->add_req(n_region, n_crx, n_src);
+    m2->add_prec(m1);
+
+    // operands for new nodes
+    m1->_opnds[0] = op_dst;
+    m1->_opnds[1] = new immL16Oper(0);
+    m2->_opnds[0] = op_dst;
+    m2->_opnds[1] = op_crx;
+    m2->_opnds[2] = op_src;
+
+    // registers for new nodes
+    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
+    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
+
+    // Insert new nodes.
+    nodes->push(m1);
+    nodes->push(m2);
+  %}
+%}
+
 // Float to Long conversion, NaN is mapped to 0.
 instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{
   match(Set dst (ConvF2L src));
+  predicate(!VM_Version::has_mtfprd());
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -10877,6 +11081,21 @@
   %}
 %}
 
+// Float to Long conversion, NaN is mapped to 0. Special version for Power8.
+instruct convF2L_reg_mffprd_ExEx(iRegLdst dst, regF src) %{
+  match(Set dst (ConvF2L src));
+  predicate(VM_Version::has_mtfprd());
+  ins_cost(DEFAULT_COST);
+
+  expand %{
+    regF tmpF;
+    flagsReg crx;
+    cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
+    convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
+    cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
+  %}
+%}
+
 instruct convD2LRaw_regD(regD dst, regD src) %{
   // no match-rule, false predicate
   effect(DEF dst, USE src);
@@ -10894,6 +11113,7 @@
 // Double to Long conversion, NaN is mapped to 0.
 instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{
   match(Set dst (ConvD2L src));
+  predicate(!VM_Version::has_mtfprd());
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -10907,6 +11127,21 @@
   %}
 %}
 
+// Double to Long conversion, NaN is mapped to 0. Special version for Power8.
+instruct convD2L_reg_mffprd_ExEx(iRegLdst dst, regD src) %{
+  match(Set dst (ConvD2L src));
+  predicate(VM_Version::has_mtfprd());
+  ins_cost(DEFAULT_COST);
+
+  expand %{
+    regD tmpD;
+    flagsReg crx;
+    cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
+    convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
+    cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
+  %}
+%}
+
 // Convert to Float
 
 // Placed here as needed in expand.
@@ -10972,7 +11207,7 @@
 // Integer to Float conversion. Special version for Power7.
 instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{
   match(Set dst (ConvI2F src));
-  predicate(VM_Version::has_fcfids());
+  predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -10986,10 +11221,23 @@
   %}
 %}
 
+// Integer to Float conversion. Special version for Power8.
+instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{
+  match(Set dst (ConvI2F src));
+  predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
+  ins_cost(DEFAULT_COST);
+
+  expand %{
+    regD tmpD;
+    moveI2D_reg(tmpD, src);
+    convL2FRaw_regF(dst, tmpD);          // Convert to float.
+  %}
+%}
+
 // L2F to avoid runtime call.
 instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{
   match(Set dst (ConvL2F src));
-  predicate(VM_Version::has_fcfids());
+  predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -11001,6 +11249,19 @@
   %}
 %}
 
+// L2F to avoid runtime call.  Special version for Power8.
+instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{
+  match(Set dst (ConvL2F src));
+  predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
+  ins_cost(DEFAULT_COST);
+
+  expand %{
+    regD tmpD;
+    moveL2D_reg(tmpD, src);
+    convL2FRaw_regF(dst, tmpD);          // Convert to float.
+  %}
+%}
+
 // Moved up as used in expand.
 //instruct convD2F_reg(regF dst, regD src) %{%}
 
@@ -11009,6 +11270,7 @@
 // Integer to Double conversion.
 instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{
   match(Set dst (ConvI2D src));
+  predicate(!VM_Version::has_mtfprd());
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -11022,6 +11284,19 @@
   %}
 %}
 
+// Integer to Double conversion. Special version for Power8.
+instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{
+  match(Set dst (ConvI2D src));
+  predicate(VM_Version::has_mtfprd());
+  ins_cost(DEFAULT_COST);
+
+  expand %{
+    regD tmpD;
+    moveI2D_reg(tmpD, src);
+    convL2DRaw_regD(dst, tmpD);          // Convert to double.
+  %}
+%}
+
 // Long to Double conversion
 instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{
   match(Set dst (ConvL2D src));
@@ -11034,6 +11309,19 @@
   %}
 %}
 
+// Long to Double conversion. Special version for Power8.
+instruct convL2D_reg_mtfprd_Ex(regD dst, iRegLsrc src) %{
+  match(Set dst (ConvL2D src));
+  predicate(VM_Version::has_mtfprd());
+  ins_cost(DEFAULT_COST);
+
+  expand %{
+    regD tmpD;
+    moveL2D_reg(tmpD, src);
+    convL2DRaw_regD(dst, tmpD);          // Convert to double.
+  %}
+%}
+
 instruct convF2D_reg(regD dst, regF src) %{
   match(Set dst (ConvF2D src));
   format %{ "FMR     $dst, $src \t// float->double" %}
--- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Tue Jul 04 15:11:25 2017 +0200
@@ -1472,13 +1472,13 @@
     case Bytecodes::_i2d:
       __ extsw(R17_tos, R17_tos);
     case Bytecodes::_l2d:
-      __ push_l_pop_d();
+      __ move_l_to_d();
       __ fcfid(F15_ftos, F15_ftos);
       break;
 
     case Bytecodes::_i2f:
       __ extsw(R17_tos, R17_tos);
-      __ push_l_pop_d();
+      __ move_l_to_d();
       if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only
         // Comment: alternatively, load with sign extend could be done by lfiwax.
         __ fcfids(F15_ftos, F15_ftos);
@@ -1490,7 +1490,7 @@
 
     case Bytecodes::_l2f:
       if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only
-        __ push_l_pop_d();
+        __ move_l_to_d();
         __ fcfids(F15_ftos, F15_ftos);
       } else {
         // Avoid rounding problem when result should be 0x3f800001: need fixup code before fcfid+frsp.
@@ -1514,7 +1514,7 @@
       __ li(R17_tos, 0); // 0 in case of NAN
       __ bso(CCR0, done);
       __ fctiwz(F15_ftos, F15_ftos);
-      __ push_d_pop_l();
+      __ move_d_to_l();
       break;
 
     case Bytecodes::_d2l:
@@ -1523,7 +1523,7 @@
       __ li(R17_tos, 0); // 0 in case of NAN
       __ bso(CCR0, done);
       __ fctidz(F15_ftos, F15_ftos);
-      __ push_d_pop_l();
+      __ move_d_to_l();
       break;
 
     default: ShouldNotReachHere();
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp	Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp	Tue Jul 04 15:11:25 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -106,6 +106,7 @@
   static bool has_vsx()     { return (_features & vsx_m) != 0; }
   static bool has_ldbrx()   { return (_features & ldbrx_m) != 0; }
   static bool has_stdbrx()  { return (_features & stdbrx_m) != 0; }
+  static bool has_mtfprd()  { return has_vpmsumb(); } // alias for P8
 
   // Assembler testing
   static void allow_all();