--- a/src/hotspot/cpu/ppc/ppc.ad Tue Oct 03 15:32:27 2017 -0700
+++ b/src/hotspot/cpu/ppc/ppc.ad Tue Oct 03 17:37:15 2017 -0700
@@ -254,6 +254,73 @@
reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v
+// ----------------------------
+// Vector-Scalar Registers
+// ----------------------------
+ reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
+ reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
+ reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
+ reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
+ reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
+ reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
+ reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
+ reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
+ reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
+ reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
+ reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
+ reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
+ reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
+ reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
+ reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
+ reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
+ reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
+ reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
+ reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
+ reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
+ reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
+ reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
+ reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
+ reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
+ reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
+ reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
+ reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
+ reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
+ reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
+ reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
+ reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
+ reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
+ reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
+ reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
+ reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
+ reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
+ reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
+ reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
+ reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
+ reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
+ reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
+ reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
+ reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
+ reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
+ reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
+ reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
+ reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
+ reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
+ reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
+ reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
+ reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
+ reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
+ reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
+ reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
+ reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
+ reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
+ reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
+ reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
+ reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
+ reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
+ reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
+ reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
+ reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
+ reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
// ----------------------------
// Specify priority of register selection within phases of register
@@ -395,6 +462,73 @@
SR_PPR
);
+alloc_class chunk4 (
+ VSR0,
+ VSR1,
+ VSR2,
+ VSR3,
+ VSR4,
+ VSR5,
+ VSR6,
+ VSR7,
+ VSR8,
+ VSR9,
+ VSR10,
+ VSR11,
+ VSR12,
+ VSR13,
+ VSR14,
+ VSR15,
+ VSR16,
+ VSR17,
+ VSR18,
+ VSR19,
+ VSR20,
+ VSR21,
+ VSR22,
+ VSR23,
+ VSR24,
+ VSR25,
+ VSR26,
+ VSR27,
+ VSR28,
+ VSR29,
+ VSR30,
+ VSR31,
+ VSR32,
+ VSR33,
+ VSR34,
+ VSR35,
+ VSR36,
+ VSR37,
+ VSR38,
+ VSR39,
+ VSR40,
+ VSR41,
+ VSR42,
+ VSR43,
+ VSR44,
+ VSR45,
+ VSR46,
+ VSR47,
+ VSR48,
+ VSR49,
+ VSR50,
+ VSR51,
+ VSR52,
+ VSR53,
+ VSR54,
+ VSR55,
+ VSR56,
+ VSR57,
+ VSR58,
+ VSR59,
+ VSR60,
+ VSR61,
+ VSR62,
+ VSR63
+);
+
//-------Architecture Description Register Classes-----------------------
// Several register classes are automatically defined based upon
@@ -769,6 +903,73 @@
F31, F31_H // nv!
);
+// Class for all 128bit vector registers
+reg_class vectorx_reg(VSR0,
+ VSR1,
+ VSR2,
+ VSR3,
+ VSR4,
+ VSR5,
+ VSR6,
+ VSR7,
+ VSR8,
+ VSR9,
+ VSR10,
+ VSR11,
+ VSR12,
+ VSR13,
+ VSR14,
+ VSR15,
+ VSR16,
+ VSR17,
+ VSR18,
+ VSR19,
+ VSR20,
+ VSR21,
+ VSR22,
+ VSR23,
+ VSR24,
+ VSR25,
+ VSR26,
+ VSR27,
+ VSR28,
+ VSR29,
+ VSR30,
+ VSR31,
+ VSR32,
+ VSR33,
+ VSR34,
+ VSR35,
+ VSR36,
+ VSR37,
+ VSR38,
+ VSR39,
+ VSR40,
+ VSR41,
+ VSR42,
+ VSR43,
+ VSR44,
+ VSR45,
+ VSR46,
+ VSR47,
+ VSR48,
+ VSR49,
+ VSR50,
+ VSR51,
+ VSR52,
+ VSR53,
+ VSR54,
+ VSR55,
+ VSR56,
+ VSR57,
+ VSR58,
+ VSR59,
+ VSR60,
+ VSR61,
+ VSR62,
+ VSR63
+);
+
%}
//----------DEFINITION BLOCK---------------------------------------------------
@@ -2048,14 +2249,24 @@
// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
- assert(MaxVectorSize == 8, "");
- return 8;
+ if (VM_Version::has_vsx()) {
+ assert(MaxVectorSize == 16, "");
+ return 16;
+ } else {
+ assert(MaxVectorSize == 8, "");
+ return 8;
+ }
}
// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int size) {
- assert(MaxVectorSize == 8 && size == 8, "");
- return Op_RegL;
+ if (VM_Version::has_vsx()) {
+ assert(MaxVectorSize == 16 && size == 16, "");
+ return Op_VecX;
+ } else {
+ assert(MaxVectorSize == 8 && size == 8, "");
+ return Op_RegL;
+ }
}
const uint Matcher::vector_shift_count_ideal_reg(int size) {
@@ -2075,7 +2286,10 @@
// PPC doesn't support misaligned vectors store/load.
const bool Matcher::misaligned_vectors_ok() {
- return false;
+ if (VM_Version::has_vsx())
+ return !AlignVector; // can be changed by flag
+ else
+ return false;
}
// PPC AES support not yet implemented
@@ -2217,10 +2431,31 @@
F13_num
};
+const MachRegisterNumbers vsarg_reg[64] = {
+ VSR0_num, VSR1_num, VSR2_num, VSR3_num,
+ VSR4_num, VSR5_num, VSR6_num, VSR7_num,
+ VSR8_num, VSR9_num, VSR10_num, VSR11_num,
+ VSR12_num, VSR13_num, VSR14_num, VSR15_num,
+ VSR16_num, VSR17_num, VSR18_num, VSR19_num,
+ VSR20_num, VSR21_num, VSR22_num, VSR23_num,
+ VSR24_num, VSR23_num, VSR24_num, VSR25_num,
+ VSR28_num, VSR29_num, VSR30_num, VSR31_num,
+ VSR32_num, VSR33_num, VSR34_num, VSR35_num,
+ VSR36_num, VSR37_num, VSR38_num, VSR39_num,
+ VSR40_num, VSR41_num, VSR42_num, VSR43_num,
+ VSR44_num, VSR45_num, VSR46_num, VSR47_num,
+ VSR48_num, VSR49_num, VSR50_num, VSR51_num,
+ VSR52_num, VSR53_num, VSR54_num, VSR55_num,
+ VSR56_num, VSR57_num, VSR58_num, VSR59_num,
+ VSR60_num, VSR61_num, VSR62_num, VSR63_num
+};
+
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
+const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
+
// Return whether or not this register is ever used as an argument. This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
@@ -2552,6 +2787,115 @@
return nodes;
}
+typedef struct {
+ loadConL_hiNode *_large_hi;
+ loadConL_loNode *_large_lo;
+ mtvsrdNode *_moved;
+ xxspltdNode *_replicated;
+ loadConLNode *_small;
+ MachNode *_last;
+} loadConLReplicatedNodesTuple;
+
+loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
+ vecXOper *dst, immI_0Oper *zero,
+ OptoReg::Name reg_second, OptoReg::Name reg_first,
+ OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
+ loadConLReplicatedNodesTuple nodes;
+
+ const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
+ if (large_constant_pool) {
+ // Create new nodes.
+ loadConL_hiNode *m1 = new loadConL_hiNode();
+ loadConL_loNode *m2 = new loadConL_loNode();
+ mtvsrdNode *m3 = new mtvsrdNode();
+ xxspltdNode *m4 = new xxspltdNode();
+
+ // inputs for new nodes
+ m1->add_req(NULL, toc);
+ m2->add_req(NULL, m1);
+ m3->add_req(NULL, m2);
+ m4->add_req(NULL, m3);
+
+ // operands for new nodes
+ m1->_opnds[0] = new iRegLdstOper(); // dst
+ m1->_opnds[1] = immSrc; // src
+ m1->_opnds[2] = new iRegPdstOper(); // toc
+
+ m2->_opnds[0] = new iRegLdstOper(); // dst
+ m2->_opnds[1] = immSrc; // src
+ m2->_opnds[2] = new iRegLdstOper(); // base
+
+ m3->_opnds[0] = new vecXOper(); // dst
+ m3->_opnds[1] = new iRegLdstOper(); // src
+
+ m4->_opnds[0] = new vecXOper(); // dst
+ m4->_opnds[1] = new vecXOper(); // src
+ m4->_opnds[2] = zero;
+
+ // Initialize ins_attrib TOC fields.
+ m1->_const_toc_offset = -1;
+ m2->_const_toc_offset_hi_node = m1;
+
+ // Initialize ins_attrib instruction offset.
+ m1->_cbuf_insts_offset = -1;
+
+ // register allocation for new nodes
+ ra_->set_pair(m1->_idx, reg_second, reg_first);
+ ra_->set_pair(m2->_idx, reg_second, reg_first);
+ ra_->set1(m3->_idx, reg_second);
+ ra_->set2(m3->_idx, reg_vec_first);
+ ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
+
+ // Create result.
+ nodes._large_hi = m1;
+ nodes._large_lo = m2;
+ nodes._moved = m3;
+ nodes._replicated = m4;
+ nodes._small = NULL;
+ nodes._last = nodes._replicated;
+ assert(m2->bottom_type()->isa_long(), "must be long");
+ } else {
+ loadConLNode *m2 = new loadConLNode();
+ mtvsrdNode *m3 = new mtvsrdNode();
+ xxspltdNode *m4 = new xxspltdNode();
+
+ // inputs for new nodes
+ m2->add_req(NULL, toc);
+
+ // operands for new nodes
+ m2->_opnds[0] = new iRegLdstOper(); // dst
+ m2->_opnds[1] = immSrc; // src
+ m2->_opnds[2] = new iRegPdstOper(); // toc
+
+ m3->_opnds[0] = new vecXOper(); // dst
+ m3->_opnds[1] = new iRegLdstOper(); // src
+
+ m4->_opnds[0] = new vecXOper(); // dst
+ m4->_opnds[1] = new vecXOper(); // src
+ m4->_opnds[2] = zero;
+
+ // Initialize ins_attrib instruction offset.
+ m2->_cbuf_insts_offset = -1;
+ ra_->set1(m3->_idx, reg_second);
+ ra_->set2(m3->_idx, reg_vec_first);
+ ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
+
+ // register allocation for new nodes
+ ra_->set_pair(m2->_idx, reg_second, reg_first);
+
+ // Create result.
+ nodes._large_hi = NULL;
+ nodes._large_lo = NULL;
+ nodes._small = m2;
+ nodes._moved = m3;
+ nodes._replicated = m4;
+ nodes._last = nodes._replicated;
+ assert(m2->bottom_type()->isa_long(), "must be long");
+ }
+
+ return nodes;
+}
+
%} // source
encode %{
@@ -3212,6 +3556,27 @@
assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
%}
+ enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc) %{
+ // Create new nodes.
+
+ // Make an operand with the bit pattern to load as float.
+ immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
+ immI_0Oper *op_zero = new immI_0Oper(0);
+
+ loadConLReplicatedNodesTuple loadConLNodes =
+ loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
+ OptoReg::Name(R20_H_num), OptoReg::Name(R20_num),
+ OptoReg::Name(VSR11_num), OptoReg::Name(VSR10_num));
+
+ // Push new nodes.
+ if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
+ if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
+ if (loadConLNodes._moved) { nodes->push(loadConLNodes._moved); }
+ if (loadConLNodes._last) { nodes->push(loadConLNodes._last); }
+
+ assert(nodes->length() >= 1, "must have created at least 1 node");
+ %}
+
// This enc_class is needed so that scheduler gets proper
// input mapping for latency computation.
enc_class enc_poll(immI dst, iRegLdst poll) %{
@@ -3840,6 +4205,14 @@
//
// Formats are generated automatically for constants and base registers.
+operand vecX() %{
+ constraint(ALLOC_IN_RC(vectorx_reg));
+ match(VecX);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
//----------Simple Operands----------------------------------------------------
// Immediate Operands
@@ -5372,6 +5745,20 @@
ins_pipe(pipe_class_memory);
%}
+// Load Aligned Packed Byte
+instruct loadV16(vecX dst, indirect mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 16);
+ match(Set dst (LoadVector mem));
+ ins_cost(MEMORY_REF_COST);
+
+ format %{ "LXVD2X $dst, $mem \t// load 16-byte Vector" %}
+ size(4);
+ ins_encode %{
+ __ lxvd2x($dst$$VectorSRegister, $mem$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// Load Range, range = array length (=jint)
instruct loadRange(iRegIdst dst, memory mem) %{
match(Set dst (LoadRange mem));
@@ -6368,6 +6755,20 @@
ins_pipe(pipe_class_memory);
%}
+// Store Packed Byte long register to memory
+instruct storeV16(indirect mem, vecX src) %{
+ predicate(n->as_StoreVector()->memory_size() == 16);
+ match(Set mem (StoreVector mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ format %{ "STXVD2X $mem, $src \t// store 16-byte Vector" %}
+ size(4);
+ ins_encode %{
+ __ stxvd2x($src$$VectorSRegister, $mem$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
@@ -13239,6 +13640,26 @@
ins_pipe(pipe_class_default);
%}
+instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
+ effect(DEF temp1, USE src);
+
+ size(4);
+ ins_encode %{
+ __ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
+ effect(DEF dst, USE src, USE imm1);
+
+ size(4);
+ ins_encode %{
+ __ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
//---------- Replicate Vector Instructions ------------------------------------
// Insrdi does replicate if src == dst.
@@ -13318,6 +13739,46 @@
ins_pipe(pipe_class_default);
%}
+instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
+ match(Set dst (ReplicateB src));
+ predicate(n->as_Vector()->length() == 16);
+
+ expand %{
+ iRegLdst tmpL;
+ vecX tmpV;
+ immI8 imm1 %{ (int) 1 %}
+ moveReg(tmpL, src);
+ repl56(tmpL);
+ repl48(tmpL);
+ mtvsrwz(tmpV, tmpL);
+ xxspltw(dst, tmpV, imm1);
+ %}
+%}
+
+instruct repl16B_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateB zero));
+ predicate(n->as_Vector()->length() == 16);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateB src));
+ predicate(n->as_Vector()->length() == 16);
+
+ format %{ "XXLEQV $dst, $src \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 4);
@@ -13352,6 +13813,46 @@
ins_pipe(pipe_class_default);
%}
+instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
+ match(Set dst (ReplicateS src));
+ predicate(n->as_Vector()->length() == 8);
+
+ expand %{
+ iRegLdst tmpL;
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+ moveReg(tmpL, src);
+ repl48(tmpL);
+ repl32(tmpL);
+ mtvsrd(tmpV, tmpL);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl8S_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateS zero));
+ predicate(n->as_Vector()->length() == 8);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate8S" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateS src));
+ predicate(n->as_Vector()->length() == 8);
+
+ format %{ "XXLEQV $dst, $src \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 2);
@@ -13386,6 +13887,46 @@
ins_pipe(pipe_class_default);
%}
+instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
+ match(Set dst (ReplicateI src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_cost(2 * DEFAULT_COST);
+
+ expand %{
+ iRegLdst tmpL;
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+ moveReg(tmpL, src);
+ repl32(tmpL);
+ mtvsrd(tmpV, tmpL);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl4I_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateI zero));
+ predicate(n->as_Vector()->length() == 4);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate4I" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateI src));
+ predicate(n->as_Vector()->length() == 4);
+
+ format %{ "XXLEQV $dst, $dst, $dst \t// replicate4I" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// Move float to int register via stack, replicate.
instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
match(Set dst (ReplicateF src));
@@ -13484,6 +14025,154 @@
%}
+instruct repl4F_reg_Ex(vecX dst, regF src) %{
+ match(Set dst (ReplicateF src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
+ expand %{
+ stackSlotL tmpS;
+ iRegIdst tmpI;
+ iRegLdst tmpL;
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+
+ moveF2I_reg_stack(tmpS, src); // Move float to stack.
+ moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg.
+ moveReg(tmpL, tmpI); // Move int to long reg.
+ repl32(tmpL); // Replicate bitpattern.
+ mtvsrd(tmpV, tmpL);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl4F_immF_Ex(vecX dst, immF src) %{
+ match(Set dst (ReplicateF src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_cost(10 * DEFAULT_COST);
+
+ postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase) );
+%}
+
+instruct repl4F_immF0(vecX dst, immF_0 zero) %{
+ match(Set dst (ReplicateF zero));
+ predicate(n->as_Vector()->length() == 4);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate4F" %}
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl2D_reg_Ex(vecX dst, regD src) %{
+ match(Set dst (ReplicateD src));
+ predicate(n->as_Vector()->length() == 2);
+ expand %{
+ stackSlotL tmpS;
+ iRegLdst tmpL;
+ iRegLdst tmp;
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+ moveD2L_reg_stack(tmpS, src);
+ moveD2L_stack_reg(tmpL, tmpS);
+ mtvsrd(tmpV, tmpL);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl2D_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateD zero));
+ predicate(n->as_Vector()->length() == 2);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate2D" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl2D_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateD src));
+ predicate(n->as_Vector()->length() == 2);
+
+ format %{ "XXLEQV $dst, $src \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct mtvsrd(vecX dst, iRegLsrc src) %{
+ predicate(false);
+ effect(DEF dst, USE src);
+
+ format %{ "MTVSRD $dst, $src \t// Move to 16-byte register"%}
+ size(4);
+ ins_encode %{
+ __ mtvsrd($dst$$VectorSRegister, $src$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
+ effect(DEF dst, USE src, USE zero);
+
+ format %{ "XXSPLATD $dst, $src, $zero \t// Permute 16-byte register"%}
+ size(4);
+ ins_encode %{
+ __ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
+ effect(DEF dst, USE src1, USE src2, USE zero);
+
+ format %{ "XXPERMDI $dst, $src1, $src2, $zero \t// Permute 16-byte register"%}
+ size(4);
+ ins_encode %{
+ __ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
+ match(Set dst (ReplicateL src));
+ predicate(n->as_Vector()->length() == 2);
+ expand %{
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+ mtvsrd(tmpV, src);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl2L_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateL zero));
+ predicate(n->as_Vector()->length() == 2);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate2L" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateL src));
+ predicate(n->as_Vector()->length() == 2);
+
+ format %{ "XXLEQV $dst, $src \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// ============================================================================
// Safepoint Instruction