8181809: PPC64: Leverage mtfprd/mffprd on POWER8
Reviewed-by: mdoerr, simonis
Contributed-by: Matthew Brandyberry <mbrandy@linux.vnet.ibm.com>
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp Tue Jul 04 15:11:25 2017 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -511,6 +511,7 @@
STXVD2X_OPCODE = (31u << OPCODE_SHIFT | 972u << 1),
MTVSRD_OPCODE = (31u << OPCODE_SHIFT | 179u << 1),
MFVSRD_OPCODE = (31u << OPCODE_SHIFT | 51u << 1),
+ MTVSRWA_OPCODE = (31u << OPCODE_SHIFT | 211u << 1),
// Vector Permute and Formatting
VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ),
@@ -2138,6 +2139,11 @@
inline void mtvrd( VectorRegister d, Register a);
inline void mfvrd( Register a, VectorRegister d);
+ // Vector-Scalar (VSX) instructions.
+ inline void mtfprd( FloatRegister d, Register a);
+ inline void mtfprwa( FloatRegister d, Register a);
+ inline void mffprd( Register a, FloatRegister d);
+
// AES (introduced with Power 8)
inline void vcipher( VectorRegister d, VectorRegister a, VectorRegister b);
inline void vcipherlast( VectorRegister d, VectorRegister a, VectorRegister b);
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp Tue Jul 04 15:11:25 2017 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -765,6 +765,11 @@
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vrt(d) | ra(a) | 1u); } // 1u: d is treated as Vector (VMX/Altivec).
inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vrt(d) | ra(a) | 1u); } // 1u: d is treated as Vector (VMX/Altivec).
+// Vector-Scalar (VSX) instructions.
+inline void Assembler::mtfprd( FloatRegister d, Register a) { emit_int32( MTVSRD_OPCODE | frt(d) | ra(a)); }
+inline void Assembler::mtfprwa( FloatRegister d, Register a) { emit_int32( MTVSRWA_OPCODE | frt(d) | ra(a)); }
+inline void Assembler::mffprd( Register a, FloatRegister d) { emit_int32( MFVSRD_OPCODE | frt(d) | ra(a)); }
+
inline void Assembler::vpkpx( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKPX_OPCODE | vrt(d) | vra(a) | vrb(b)); }
inline void Assembler::vpkshss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHSS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
inline void Assembler::vpkswss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWSS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
--- a/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp Tue Jul 04 15:11:25 2017 +0200
@@ -514,25 +514,48 @@
}
case Bytecodes::_i2d:
case Bytecodes::_l2d: {
- __ fcfid(dst->as_double_reg(), src->as_double_reg()); // via mem
+ bool src_in_memory = !VM_Version::has_mtfprd();
+ FloatRegister rdst = dst->as_double_reg();
+ FloatRegister rsrc;
+ if (src_in_memory) {
+ rsrc = src->as_double_reg(); // via mem
+ } else {
+ // move src to dst register
+ if (code == Bytecodes::_i2d) {
+ __ mtfprwa(rdst, src->as_register());
+ } else {
+ __ mtfprd(rdst, src->as_register_lo());
+ }
+ rsrc = rdst;
+ }
+ __ fcfid(rdst, rsrc);
break;
}
- case Bytecodes::_i2f: {
+ case Bytecodes::_i2f:
+ case Bytecodes::_l2f: {
+ bool src_in_memory = !VM_Version::has_mtfprd();
FloatRegister rdst = dst->as_float_reg();
- FloatRegister rsrc = src->as_double_reg(); // via mem
+ FloatRegister rsrc;
+ if (src_in_memory) {
+ rsrc = src->as_double_reg(); // via mem
+ } else {
+ // move src to dst register
+ if (code == Bytecodes::_i2f) {
+ __ mtfprwa(rdst, src->as_register());
+ } else {
+ __ mtfprd(rdst, src->as_register_lo());
+ }
+ rsrc = rdst;
+ }
if (VM_Version::has_fcfids()) {
__ fcfids(rdst, rsrc);
} else {
+ assert(code == Bytecodes::_i2f, "fcfid+frsp needs fixup code to avoid rounding incompatibility");
__ fcfid(rdst, rsrc);
__ frsp(rdst, rdst);
}
break;
}
- case Bytecodes::_l2f: { // >= Power7
- assert(VM_Version::has_fcfids(), "fcfid+frsp needs fixup code to avoid rounding incompatibility");
- __ fcfids(dst->as_float_reg(), src->as_double_reg()); // via mem
- break;
- }
case Bytecodes::_f2d: {
__ fmr_if_needed(dst->as_double_reg(), src->as_float_reg());
break;
@@ -543,31 +566,49 @@
}
case Bytecodes::_d2i:
case Bytecodes::_f2i: {
+ bool dst_in_memory = !VM_Version::has_mtfprd();
FloatRegister rsrc = (code == Bytecodes::_d2i) ? src->as_double_reg() : src->as_float_reg();
- Address addr = frame_map()->address_for_slot(dst->double_stack_ix());
+ Address addr = dst_in_memory ? frame_map()->address_for_slot(dst->double_stack_ix()) : NULL;
Label L;
// Result must be 0 if value is NaN; test by comparing value to itself.
__ fcmpu(CCR0, rsrc, rsrc);
- __ li(R0, 0); // 0 in case of NAN
- __ std(R0, addr.disp(), addr.base());
+ if (dst_in_memory) {
+ __ li(R0, 0); // 0 in case of NAN
+ __ std(R0, addr.disp(), addr.base());
+ } else {
+ __ li(dst->as_register(), 0);
+ }
__ bso(CCR0, L);
__ fctiwz(rsrc, rsrc); // USE_KILL
- __ stfd(rsrc, addr.disp(), addr.base());
+ if (dst_in_memory) {
+ __ stfd(rsrc, addr.disp(), addr.base());
+ } else {
+ __ mffprd(dst->as_register(), rsrc);
+ }
__ bind(L);
break;
}
case Bytecodes::_d2l:
case Bytecodes::_f2l: {
+ bool dst_in_memory = !VM_Version::has_mtfprd();
FloatRegister rsrc = (code == Bytecodes::_d2l) ? src->as_double_reg() : src->as_float_reg();
- Address addr = frame_map()->address_for_slot(dst->double_stack_ix());
+ Address addr = dst_in_memory ? frame_map()->address_for_slot(dst->double_stack_ix()) : NULL;
Label L;
// Result must be 0 if value is NaN; test by comparing value to itself.
__ fcmpu(CCR0, rsrc, rsrc);
- __ li(R0, 0); // 0 in case of NAN
- __ std(R0, addr.disp(), addr.base());
+ if (dst_in_memory) {
+ __ li(R0, 0); // 0 in case of NAN
+ __ std(R0, addr.disp(), addr.base());
+ } else {
+ __ li(dst->as_register_lo(), 0);
+ }
__ bso(CCR0, L);
__ fctidz(rsrc, rsrc); // USE_KILL
- __ stfd(rsrc, addr.disp(), addr.base());
+ if (dst_in_memory) {
+ __ stfd(rsrc, addr.disp(), addr.base());
+ } else {
+ __ mffprd(dst->as_register_lo(), rsrc);
+ }
__ bind(L);
break;
}
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp Tue Jul 04 15:11:25 2017 +0200
@@ -871,81 +871,91 @@
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
void LIRGenerator::do_Convert(Convert* x) {
- switch (x->op()) {
+ if (!VM_Version::has_mtfprd()) {
+ switch (x->op()) {
+
+ // int -> float: force spill
+ case Bytecodes::_l2f: {
+ if (!VM_Version::has_fcfids()) { // fcfids is >= Power7 only
+ // fcfid+frsp needs fixup code to avoid rounding incompatibility.
+ address entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f);
+ LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL);
+ set_result(x, result);
+ return;
+ } // else fallthru
+ }
+ case Bytecodes::_l2d: {
+ LIRItem value(x->value(), this);
+ LIR_Opr reg = rlock_result(x);
+ value.load_item();
+ LIR_Opr tmp = force_to_spill(value.result(), T_DOUBLE);
+ __ convert(x->op(), tmp, reg);
+ return;
+ }
+ case Bytecodes::_i2f:
+ case Bytecodes::_i2d: {
+ LIRItem value(x->value(), this);
+ LIR_Opr reg = rlock_result(x);
+ value.load_item();
+ // Convert i2l first.
+ LIR_Opr tmp1 = new_register(T_LONG);
+ __ convert(Bytecodes::_i2l, value.result(), tmp1);
+ LIR_Opr tmp2 = force_to_spill(tmp1, T_DOUBLE);
+ __ convert(x->op(), tmp2, reg);
+ return;
+ }
- // int -> float: force spill
- case Bytecodes::_l2f: {
- if (!VM_Version::has_fcfids()) { // fcfids is >= Power7 only
- // fcfid+frsp needs fixup code to avoid rounding incompatibility.
- address entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f);
- LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL);
- set_result(x, result);
+ // float -> int: result will be stored
+ case Bytecodes::_f2l:
+ case Bytecodes::_d2l: {
+ LIRItem value(x->value(), this);
+ LIR_Opr reg = rlock_result(x);
+ value.set_destroys_register(); // USE_KILL
+ value.load_item();
+ set_vreg_flag(reg, must_start_in_memory);
+ __ convert(x->op(), value.result(), reg);
+ return;
+ }
+ case Bytecodes::_f2i:
+ case Bytecodes::_d2i: {
+ LIRItem value(x->value(), this);
+ LIR_Opr reg = rlock_result(x);
+ value.set_destroys_register(); // USE_KILL
+ value.load_item();
+ // Convert l2i afterwards.
+ LIR_Opr tmp1 = new_register(T_LONG);
+ set_vreg_flag(tmp1, must_start_in_memory);
+ __ convert(x->op(), value.result(), tmp1);
+ __ convert(Bytecodes::_l2i, tmp1, reg);
+ return;
+ }
+
+ // Within same category: just register conversions.
+ case Bytecodes::_i2b:
+ case Bytecodes::_i2c:
+ case Bytecodes::_i2s:
+ case Bytecodes::_i2l:
+ case Bytecodes::_l2i:
+ case Bytecodes::_f2d:
+ case Bytecodes::_d2f:
break;
- } // else fallthru
+
+ default: ShouldNotReachHere();
}
- case Bytecodes::_l2d: {
- LIRItem value(x->value(), this);
- LIR_Opr reg = rlock_result(x);
- value.load_item();
- LIR_Opr tmp = force_to_spill(value.result(), T_DOUBLE);
- __ convert(x->op(), tmp, reg);
- break;
- }
- case Bytecodes::_i2f:
- case Bytecodes::_i2d: {
- LIRItem value(x->value(), this);
- LIR_Opr reg = rlock_result(x);
- value.load_item();
- // Convert i2l first.
- LIR_Opr tmp1 = new_register(T_LONG);
- __ convert(Bytecodes::_i2l, value.result(), tmp1);
- LIR_Opr tmp2 = force_to_spill(tmp1, T_DOUBLE);
- __ convert(x->op(), tmp2, reg);
- break;
- }
+ }
- // float -> int: result will be stored
+ // Register conversion.
+ LIRItem value(x->value(), this);
+ LIR_Opr reg = rlock_result(x);
+ value.load_item();
+ switch (x->op()) {
case Bytecodes::_f2l:
- case Bytecodes::_d2l: {
- LIRItem value(x->value(), this);
- LIR_Opr reg = rlock_result(x);
- value.set_destroys_register(); // USE_KILL
- value.load_item();
- set_vreg_flag(reg, must_start_in_memory);
- __ convert(x->op(), value.result(), reg);
- break;
- }
+ case Bytecodes::_d2l:
case Bytecodes::_f2i:
- case Bytecodes::_d2i: {
- LIRItem value(x->value(), this);
- LIR_Opr reg = rlock_result(x);
- value.set_destroys_register(); // USE_KILL
- value.load_item();
- // Convert l2i afterwards.
- LIR_Opr tmp1 = new_register(T_LONG);
- set_vreg_flag(tmp1, must_start_in_memory);
- __ convert(x->op(), value.result(), tmp1);
- __ convert(Bytecodes::_l2i, tmp1, reg);
- break;
- }
-
- // Within same category: just register conversions.
- case Bytecodes::_i2b:
- case Bytecodes::_i2c:
- case Bytecodes::_i2s:
- case Bytecodes::_i2l:
- case Bytecodes::_l2i:
- case Bytecodes::_f2d:
- case Bytecodes::_d2f: {
- LIRItem value(x->value(), this);
- LIR_Opr reg = rlock_result(x);
- value.load_item();
- __ convert(x->op(), value.result(), reg);
- break;
- }
-
- default: ShouldNotReachHere();
+ case Bytecodes::_d2i: value.set_destroys_register(); break; // USE_KILL
+ default: break;
}
+ __ convert(x->op(), value.result(), reg);
}
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc.hpp Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc.hpp Tue Jul 04 15:11:25 2017 +0200
@@ -99,8 +99,8 @@
void push_2ptrs(Register first, Register second);
- void push_l_pop_d(Register l = R17_tos, FloatRegister d = F15_ftos);
- void push_d_pop_l(FloatRegister d = F15_ftos, Register l = R17_tos);
+ void move_l_to_d(Register l = R17_tos, FloatRegister d = F15_ftos);
+ void move_d_to_l(FloatRegister d = F15_ftos, Register l = R17_tos);
void pop (TosState state); // transition vtos -> state
void push(TosState state); // transition state -> vtos
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Tue Jul 04 15:11:25 2017 +0200
@@ -284,14 +284,22 @@
addi(R15_esp, R15_esp, - 2 * Interpreter::stackElementSize );
}
-void InterpreterMacroAssembler::push_l_pop_d(Register l, FloatRegister d) {
- std(l, 0, R15_esp);
- lfd(d, 0, R15_esp);
+void InterpreterMacroAssembler::move_l_to_d(Register l, FloatRegister d) {
+ if (VM_Version::has_mtfprd()) {
+ mtfprd(d, l);
+ } else {
+ std(l, 0, R15_esp);
+ lfd(d, 0, R15_esp);
+ }
}
-void InterpreterMacroAssembler::push_d_pop_l(FloatRegister d, Register l) {
- stfd(d, 0, R15_esp);
- ld(l, 0, R15_esp);
+void InterpreterMacroAssembler::move_d_to_l(FloatRegister d, Register l) {
+ if (VM_Version::has_mtfprd()) {
+ mffprd(l, d);
+ } else {
+ stfd(d, 0, R15_esp);
+ ld(l, 0, R15_esp);
+ }
}
void InterpreterMacroAssembler::push(TosState state) {
--- a/hotspot/src/cpu/ppc/vm/ppc.ad Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad Tue Jul 04 15:11:25 2017 +0200
@@ -3079,6 +3079,17 @@
__ bind(done);
%}
+ enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+
+ MacroAssembler _masm(&cbuf);
+ Label done;
+ __ bso($crx$$CondRegister, done);
+ __ mffprd($dst$$Register, $src$$FloatRegister);
+ // TODO PPC port __ endgroup_if_needed(_size == 12);
+ __ bind(done);
+ %}
+
enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -10126,9 +10137,36 @@
// float intBitsToFloat(int bits)
//
// Notes on the implementation on ppc64:
-// We only provide rules which move between a register and a stack-location,
-// because we always have to go through memory when moving between a float
-// register and an integer register.
+// For Power7 and earlier, the rules are limited to those which move between a
+// register and a stack-location, because we always have to go through memory
+// when moving between a float register and an integer register.
+// This restriction is removed in Power8 with the introduction of the mtfprd
+// and mffprd instructions.
+
+instruct moveL2D_reg(regD dst, iRegLsrc src) %{
+ match(Set dst (MoveL2D src));
+ predicate(VM_Version::has_mtfprd());
+
+ format %{ "MTFPRD $dst, $src" %}
+ size(4);
+ ins_encode %{
+ __ mtfprd($dst$$FloatRegister, $src$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct moveI2D_reg(regD dst, iRegIsrc src) %{
+ // no match-rule, false predicate
+ effect(DEF dst, USE src);
+ predicate(false);
+
+ format %{ "MTFPRWA $dst, $src" %}
+ size(4);
+ ins_encode %{
+ __ mtfprwa($dst$$FloatRegister, $src$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
//---------- Chain stack slots between similar types --------
@@ -10641,6 +10679,20 @@
ins_pipe(pipe_class_default);
%}
+instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{
+ // no match-rule, false predicate
+ effect(DEF dst, USE crx, USE src);
+ predicate(false);
+
+ ins_variable_size_depending_on_alignment(true);
+
+ format %{ "cmovI $crx, $dst, $src" %}
+ // Worst case is branch + move + stop, no stop without scheduler.
+ size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
+ ins_encode( enc_cmove_bso_reg(dst, crx, src) );
+ ins_pipe(pipe_class_default);
+%}
+
instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
// no match-rule, false predicate
effect(DEF dst, USE crx, USE mem);
@@ -10695,9 +10747,64 @@
%}
%}
+instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{
+ // no match-rule, false predicate
+ effect(DEF dst, USE crx, USE src);
+ predicate(false);
+
+ format %{ "CmovI $dst, $crx, $src \t// postalloc expanded" %}
+ postalloc_expand %{
+ //
+ // replaces
+ //
+ // region dst crx src
+ // \ | | /
+ // dst=cmovI_bso_reg_conLvalue0
+ //
+ // with
+ //
+ // region dst
+ // \ /
+ // dst=loadConI16(0)
+ // |
+ // ^ region dst crx src
+ // | \ | | /
+ // dst=cmovI_bso_reg
+ //
+
+ // Create new nodes.
+ MachNode *m1 = new loadConI16Node();
+ MachNode *m2 = new cmovI_bso_regNode();
+
+ // inputs for new nodes
+ m1->add_req(n_region);
+ m2->add_req(n_region, n_crx, n_src);
+
+ // precedences for new nodes
+ m2->add_prec(m1);
+
+ // operands for new nodes
+ m1->_opnds[0] = op_dst;
+ m1->_opnds[1] = new immI16Oper(0);
+
+ m2->_opnds[0] = op_dst;
+ m2->_opnds[1] = op_crx;
+ m2->_opnds[2] = op_src;
+
+ // registers for new nodes
+ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
+ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
+
+ // Insert new nodes.
+ nodes->push(m1);
+ nodes->push(m2);
+ %}
+%}
+
// Double to Int conversion, NaN is mapped to 0.
instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{
match(Set dst (ConvD2I src));
+ predicate(!VM_Version::has_mtfprd());
ins_cost(DEFAULT_COST);
expand %{
@@ -10711,6 +10818,21 @@
%}
%}
+// Double to Int conversion, NaN is mapped to 0. Special version for Power8.
+instruct convD2I_reg_mffprd_ExEx(iRegIdst dst, regD src) %{
+ match(Set dst (ConvD2I src));
+ predicate(VM_Version::has_mtfprd());
+ ins_cost(DEFAULT_COST);
+
+ expand %{
+ regD tmpD;
+ flagsReg crx;
+ cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN.
+ convD2IRaw_regD(tmpD, src); // Convert float to int (speculated).
+ cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpD); // Cmove based on NaN check.
+ %}
+%}
+
instruct convF2IRaw_regF(regF dst, regF src) %{
// no match-rule, false predicate
effect(DEF dst, USE src);
@@ -10728,6 +10850,7 @@
// Float to Int conversion, NaN is mapped to 0.
instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{
match(Set dst (ConvF2I src));
+ predicate(!VM_Version::has_mtfprd());
ins_cost(DEFAULT_COST);
expand %{
@@ -10741,6 +10864,21 @@
%}
%}
+// Float to Int conversion, NaN is mapped to 0. Special version for Power8.
+instruct convF2I_regF_mffprd_ExEx(iRegIdst dst, regF src) %{
+ match(Set dst (ConvF2I src));
+ predicate(VM_Version::has_mtfprd());
+ ins_cost(DEFAULT_COST);
+
+ expand %{
+ regF tmpF;
+ flagsReg crx;
+ cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN.
+ convF2IRaw_regF(tmpF, src); // Convert float to int (speculated).
+ cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpF); // Cmove based on NaN check.
+ %}
+%}
+
// Convert to Long
instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
@@ -10810,6 +10948,20 @@
ins_pipe(pipe_class_default);
%}
+instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
+ // no match-rule, false predicate
+ effect(DEF dst, USE crx, USE src);
+ predicate(false);
+
+ ins_variable_size_depending_on_alignment(true);
+
+ format %{ "cmovL $crx, $dst, $src" %}
+ // Worst case is branch + move + stop, no stop without scheduler.
+ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
+ ins_encode( enc_cmove_bso_reg(dst, crx, src) );
+ ins_pipe(pipe_class_default);
+%}
+
instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
// no match-rule, false predicate
effect(DEF dst, USE crx, USE mem);
@@ -10861,9 +11013,61 @@
%}
%}
+instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{
+ // no match-rule, false predicate
+ effect(DEF dst, USE crx, USE src);
+ predicate(false);
+
+ format %{ "CmovL $dst, $crx, $src \t// postalloc expanded" %}
+ postalloc_expand %{
+ //
+ // replaces
+ //
+ // region dst crx src
+ // \ | | /
+ // dst=cmovL_bso_reg_conLvalue0
+ //
+ // with
+ //
+ // region dst
+ // \ /
+ // dst=loadConL16(0)
+ // |
+ // ^ region dst crx src
+ // | \ | | /
+ // dst=cmovL_bso_reg
+ //
+
+ // Create new nodes.
+ MachNode *m1 = new loadConL16Node();
+ MachNode *m2 = new cmovL_bso_regNode();
+
+ // inputs for new nodes
+ m1->add_req(n_region);
+ m2->add_req(n_region, n_crx, n_src);
+ m2->add_prec(m1);
+
+ // operands for new nodes
+ m1->_opnds[0] = op_dst;
+ m1->_opnds[1] = new immL16Oper(0);
+ m2->_opnds[0] = op_dst;
+ m2->_opnds[1] = op_crx;
+ m2->_opnds[2] = op_src;
+
+ // registers for new nodes
+ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
+ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
+
+ // Insert new nodes.
+ nodes->push(m1);
+ nodes->push(m2);
+ %}
+%}
+
// Float to Long conversion, NaN is mapped to 0.
instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{
match(Set dst (ConvF2L src));
+ predicate(!VM_Version::has_mtfprd());
ins_cost(DEFAULT_COST);
expand %{
@@ -10877,6 +11081,21 @@
%}
%}
+// Float to Long conversion, NaN is mapped to 0. Special version for Power8.
+instruct convF2L_reg_mffprd_ExEx(iRegLdst dst, regF src) %{
+ match(Set dst (ConvF2L src));
+ predicate(VM_Version::has_mtfprd());
+ ins_cost(DEFAULT_COST);
+
+ expand %{
+ regF tmpF;
+ flagsReg crx;
+ cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN.
+ convF2LRaw_regF(tmpF, src); // Convert float to long (speculated).
+ cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpF); // Cmove based on NaN check.
+ %}
+%}
+
instruct convD2LRaw_regD(regD dst, regD src) %{
// no match-rule, false predicate
effect(DEF dst, USE src);
@@ -10894,6 +11113,7 @@
// Double to Long conversion, NaN is mapped to 0.
instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{
match(Set dst (ConvD2L src));
+ predicate(!VM_Version::has_mtfprd());
ins_cost(DEFAULT_COST);
expand %{
@@ -10907,6 +11127,21 @@
%}
%}
+// Double to Long conversion, NaN is mapped to 0. Special version for Power8.
+instruct convD2L_reg_mffprd_ExEx(iRegLdst dst, regD src) %{
+ match(Set dst (ConvD2L src));
+ predicate(VM_Version::has_mtfprd());
+ ins_cost(DEFAULT_COST);
+
+ expand %{
+ regD tmpD;
+ flagsReg crx;
+ cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN.
+ convD2LRaw_regD(tmpD, src); // Convert float to long (speculated).
+ cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpD); // Cmove based on NaN check.
+ %}
+%}
+
// Convert to Float
// Placed here as needed in expand.
@@ -10972,7 +11207,7 @@
// Integer to Float conversion. Special version for Power7.
instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{
match(Set dst (ConvI2F src));
- predicate(VM_Version::has_fcfids());
+ predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
ins_cost(DEFAULT_COST);
expand %{
@@ -10986,10 +11221,23 @@
%}
%}
+// Integer to Float conversion. Special version for Power8.
+instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{
+ match(Set dst (ConvI2F src));
+ predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
+ ins_cost(DEFAULT_COST);
+
+ expand %{
+ regD tmpD;
+ moveI2D_reg(tmpD, src);
+ convL2FRaw_regF(dst, tmpD); // Convert to float.
+ %}
+%}
+
// L2F to avoid runtime call.
instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{
match(Set dst (ConvL2F src));
- predicate(VM_Version::has_fcfids());
+ predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
ins_cost(DEFAULT_COST);
expand %{
@@ -11001,6 +11249,19 @@
%}
%}
+// L2F to avoid runtime call. Special version for Power8.
+instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{
+ match(Set dst (ConvL2F src));
+ predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
+ ins_cost(DEFAULT_COST);
+
+ expand %{
+ regD tmpD;
+ moveL2D_reg(tmpD, src);
+ convL2FRaw_regF(dst, tmpD); // Convert to float.
+ %}
+%}
+
// Moved up as used in expand.
//instruct convD2F_reg(regF dst, regD src) %{%}
@@ -11009,6 +11270,7 @@
// Integer to Double conversion.
instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{
match(Set dst (ConvI2D src));
+ predicate(!VM_Version::has_mtfprd());
ins_cost(DEFAULT_COST);
expand %{
@@ -11022,6 +11284,19 @@
%}
%}
+// Integer to Double conversion. Special version for Power8.
+instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{
+ match(Set dst (ConvI2D src));
+ predicate(VM_Version::has_mtfprd());
+ ins_cost(DEFAULT_COST);
+
+ expand %{
+ regD tmpD;
+ moveI2D_reg(tmpD, src);
+ convL2DRaw_regD(dst, tmpD); // Convert to double.
+ %}
+%}
+
// Long to Double conversion
instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{
match(Set dst (ConvL2D src));
@@ -11034,6 +11309,19 @@
%}
%}
+// Long to Double conversion. Special version for Power8.
+instruct convL2D_reg_mtfprd_Ex(regD dst, iRegLsrc src) %{
+ match(Set dst (ConvL2D src));
+ predicate(VM_Version::has_mtfprd());
+ ins_cost(DEFAULT_COST);
+
+ expand %{
+ regD tmpD;
+ moveL2D_reg(tmpD, src);
+ convL2DRaw_regD(dst, tmpD); // Convert to double.
+ %}
+%}
+
instruct convF2D_reg(regD dst, regF src) %{
match(Set dst (ConvF2D src));
format %{ "FMR $dst, $src \t// float->double" %}
--- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp Tue Jul 04 15:11:25 2017 +0200
@@ -1472,13 +1472,13 @@
case Bytecodes::_i2d:
__ extsw(R17_tos, R17_tos);
case Bytecodes::_l2d:
- __ push_l_pop_d();
+ __ move_l_to_d();
__ fcfid(F15_ftos, F15_ftos);
break;
case Bytecodes::_i2f:
__ extsw(R17_tos, R17_tos);
- __ push_l_pop_d();
+ __ move_l_to_d();
if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only
// Comment: alternatively, load with sign extend could be done by lfiwax.
__ fcfids(F15_ftos, F15_ftos);
@@ -1490,7 +1490,7 @@
case Bytecodes::_l2f:
if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only
- __ push_l_pop_d();
+ __ move_l_to_d();
__ fcfids(F15_ftos, F15_ftos);
} else {
// Avoid rounding problem when result should be 0x3f800001: need fixup code before fcfid+frsp.
@@ -1514,7 +1514,7 @@
__ li(R17_tos, 0); // 0 in case of NAN
__ bso(CCR0, done);
__ fctiwz(F15_ftos, F15_ftos);
- __ push_d_pop_l();
+ __ move_d_to_l();
break;
case Bytecodes::_d2l:
@@ -1523,7 +1523,7 @@
__ li(R17_tos, 0); // 0 in case of NAN
__ bso(CCR0, done);
__ fctidz(F15_ftos, F15_ftos);
- __ push_d_pop_l();
+ __ move_d_to_l();
break;
default: ShouldNotReachHere();
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp Tue Jul 04 09:16:26 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp Tue Jul 04 15:11:25 2017 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -106,6 +106,7 @@
static bool has_vsx() { return (_features & vsx_m) != 0; }
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
+ static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8
// Assembler testing
static void allow_all();