8208171: PPC64: Enrich SLP support
authormhorie
Wed, 12 Sep 2018 14:24:17 +0200
changeset 51711 3aafd7015d87
parent 51710 355bd23b46e5
child 51712 f0f5d23449d3
8208171: PPC64: Enrich SLP support Reviewed-by: mdoerr, gromero
src/hotspot/cpu/ppc/assembler_ppc.hpp
src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
src/hotspot/cpu/ppc/ppc.ad
src/hotspot/cpu/ppc/register_definitions_ppc.cpp
src/hotspot/cpu/ppc/register_ppc.cpp
src/hotspot/cpu/ppc/register_ppc.hpp
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp	Wed Sep 12 12:54:16 2018 +0200
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp	Wed Sep 12 14:24:17 2018 +0200
@@ -521,6 +521,18 @@
     XXLOR_OPCODE   = (60u << OPCODE_SHIFT |  146u << 3),
     XXLXOR_OPCODE  = (60u << OPCODE_SHIFT |  154u << 3),
     XXLEQV_OPCODE  = (60u << OPCODE_SHIFT |  186u << 3),
+    XVDIVSP_OPCODE = (60u << OPCODE_SHIFT |   88u << 3),
+    XVDIVDP_OPCODE = (60u << OPCODE_SHIFT |  120u << 3),
+    XVABSSP_OPCODE = (60u << OPCODE_SHIFT |  409u << 2),
+    XVABSDP_OPCODE = (60u << OPCODE_SHIFT |  473u << 2),
+    XVNEGSP_OPCODE = (60u << OPCODE_SHIFT |  441u << 2),
+    XVNEGDP_OPCODE = (60u << OPCODE_SHIFT |  505u << 2),
+    XVSQRTSP_OPCODE= (60u << OPCODE_SHIFT |  139u << 2),
+    XVSQRTDP_OPCODE= (60u << OPCODE_SHIFT |  203u << 2),
+    XVADDDP_OPCODE = (60u << OPCODE_SHIFT |   96u << 3),
+    XVSUBDP_OPCODE = (60u << OPCODE_SHIFT |  104u << 3),
+    XVMULSP_OPCODE = (60u << OPCODE_SHIFT |   80u << 3),
+    XVMULDP_OPCODE = (60u << OPCODE_SHIFT |  112u << 3),
 
     // Vector Permute and Formatting
     VPKPX_OPCODE   = (4u  << OPCODE_SHIFT |  782u     ),
@@ -574,6 +586,7 @@
     VADDUBS_OPCODE = (4u  << OPCODE_SHIFT |  512u     ),
     VADDUWS_OPCODE = (4u  << OPCODE_SHIFT |  640u     ),
     VADDUHS_OPCODE = (4u  << OPCODE_SHIFT |  576u     ),
+    VADDFP_OPCODE  = (4u  << OPCODE_SHIFT |   10u     ),
     VSUBCUW_OPCODE = (4u  << OPCODE_SHIFT | 1408u     ),
     VSUBSHS_OPCODE = (4u  << OPCODE_SHIFT | 1856u     ),
     VSUBSBS_OPCODE = (4u  << OPCODE_SHIFT | 1792u     ),
@@ -581,9 +594,11 @@
     VSUBUBM_OPCODE = (4u  << OPCODE_SHIFT | 1024u     ),
     VSUBUWM_OPCODE = (4u  << OPCODE_SHIFT | 1152u     ),
     VSUBUHM_OPCODE = (4u  << OPCODE_SHIFT | 1088u     ),
+    VSUBUDM_OPCODE = (4u  << OPCODE_SHIFT | 1216u     ),
     VSUBUBS_OPCODE = (4u  << OPCODE_SHIFT | 1536u     ),
     VSUBUWS_OPCODE = (4u  << OPCODE_SHIFT | 1664u     ),
     VSUBUHS_OPCODE = (4u  << OPCODE_SHIFT | 1600u     ),
+    VSUBFP_OPCODE  = (4u  << OPCODE_SHIFT |   74u     ),
 
     VMULESB_OPCODE = (4u  << OPCODE_SHIFT |  776u     ),
     VMULEUB_OPCODE = (4u  << OPCODE_SHIFT |  520u     ),
@@ -592,7 +607,9 @@
     VMULOSB_OPCODE = (4u  << OPCODE_SHIFT |  264u     ),
     VMULOUB_OPCODE = (4u  << OPCODE_SHIFT |    8u     ),
     VMULOSH_OPCODE = (4u  << OPCODE_SHIFT |  328u     ),
+    VMULOSW_OPCODE = (4u  << OPCODE_SHIFT |  392u     ),
     VMULOUH_OPCODE = (4u  << OPCODE_SHIFT |   72u     ),
+    VMULUWM_OPCODE = (4u  << OPCODE_SHIFT |  137u     ),
     VMHADDSHS_OPCODE=(4u  << OPCODE_SHIFT |   32u     ),
     VMHRADDSHS_OPCODE=(4u << OPCODE_SHIFT |   33u     ),
     VMLADDUHM_OPCODE=(4u  << OPCODE_SHIFT |   34u     ),
@@ -602,6 +619,7 @@
     VMSUMSHS_OPCODE= (4u  << OPCODE_SHIFT |   41u     ),
     VMSUMUHM_OPCODE= (4u  << OPCODE_SHIFT |   38u     ),
     VMSUMUHS_OPCODE= (4u  << OPCODE_SHIFT |   39u     ),
+    VMADDFP_OPCODE = (4u  << OPCODE_SHIFT |   46u     ),
 
     VSUMSWS_OPCODE = (4u  << OPCODE_SHIFT | 1928u     ),
     VSUM2SWS_OPCODE= (4u  << OPCODE_SHIFT | 1672u     ),
@@ -657,6 +675,7 @@
     VSRAB_OPCODE   = (4u  << OPCODE_SHIFT |  772u     ),
     VSRAW_OPCODE   = (4u  << OPCODE_SHIFT |  900u     ),
     VSRAH_OPCODE   = (4u  << OPCODE_SHIFT |  836u     ),
+    VPOPCNTW_OPCODE= (4u  << OPCODE_SHIFT | 1923u     ),
 
     // Vector Floating-Point
     // not implemented yet
@@ -2059,6 +2078,7 @@
   inline void vaddubs(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vadduws(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vadduhs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vaddfp(   VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsubcuw(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsubshs(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsubsbs(  VectorRegister d, VectorRegister a, VectorRegister b);
@@ -2066,9 +2086,11 @@
   inline void vsububm(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsubuwm(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsubuhm(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubudm(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsububs(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsubuws(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsubuhs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubfp(   VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vmulesb(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vmuleub(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vmulesh(  VectorRegister d, VectorRegister a, VectorRegister b);
@@ -2076,7 +2098,9 @@
   inline void vmulosb(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vmuloub(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vmulosh(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmulosw(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vmulouh(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmuluwm(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vmhaddshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
   inline void vmhraddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c);
   inline void vmladduhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
@@ -2086,6 +2110,7 @@
   inline void vmsumshs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
   inline void vmsumuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
   inline void vmsumuhs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmaddfp(  VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
   inline void vsumsws(  VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsum2sws( VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsum4sbs( VectorRegister d, VectorRegister a, VectorRegister b);
@@ -2146,6 +2171,7 @@
   inline void vsrab(    VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsraw(    VectorRegister d, VectorRegister a, VectorRegister b);
   inline void vsrah(    VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpopcntw( VectorRegister d, VectorRegister b);
   // Vector Floating-Point not implemented yet
   inline void mtvscr(   VectorRegister b);
   inline void mfvscr(   VectorRegister d);
@@ -2168,6 +2194,18 @@
   inline void xxlor(    VectorSRegister d, VectorSRegister a, VectorSRegister b);
   inline void xxlxor(   VectorSRegister d, VectorSRegister a, VectorSRegister b);
   inline void xxleqv(   VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvdivsp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvdivdp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvabssp(  VectorSRegister d, VectorSRegister b);
+  inline void xvabsdp(  VectorSRegister d, VectorSRegister b);
+  inline void xvnegsp(  VectorSRegister d, VectorSRegister b);
+  inline void xvnegdp(  VectorSRegister d, VectorSRegister b);
+  inline void xvsqrtsp( VectorSRegister d, VectorSRegister b);
+  inline void xvsqrtdp( VectorSRegister d, VectorSRegister b);
+  inline void xvadddp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvsubdp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmulsp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xvmuldp(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
 
   // VSX Extended Mnemonics
   inline void xxspltd(  VectorSRegister d, VectorSRegister a, int x);
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Wed Sep 12 12:54:16 2018 +0200
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Wed Sep 12 14:24:17 2018 +0200
@@ -769,7 +769,19 @@
 inline void Assembler::xxlor(   VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLOR_OPCODE  | vsrt(d) | vsra(a) | vsrb(b)); }
 inline void Assembler::xxlxor(  VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLXOR_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
 inline void Assembler::xxleqv(  VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLEQV_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
-inline void Assembler::mtvrd(    VectorRegister d, Register a)               { emit_int32( MTVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
+inline void Assembler::xvdivsp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVDIVSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvdivdp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVDIVDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvabssp( VectorSRegister d, VectorSRegister b)                    { emit_int32( XVABSSP_OPCODE | vsrt(d) | vsrb(b)); }
+inline void Assembler::xvabsdp( VectorSRegister d, VectorSRegister b)                    { emit_int32( XVABSDP_OPCODE | vsrt(d) | vsrb(b)); }
+inline void Assembler::xvnegsp( VectorSRegister d, VectorSRegister b)                    { emit_int32( XVNEGSP_OPCODE | vsrt(d) | vsrb(b)); }
+inline void Assembler::xvnegdp( VectorSRegister d, VectorSRegister b)                    { emit_int32( XVNEGDP_OPCODE | vsrt(d) | vsrb(b)); }
+inline void Assembler::xvsqrtsp(VectorSRegister d, VectorSRegister b)                    { emit_int32( XVSQRTSP_OPCODE| vsrt(d) | vsrb(b)); }
+inline void Assembler::xvsqrtdp(VectorSRegister d, VectorSRegister b)                    { emit_int32( XVSQRTDP_OPCODE| vsrt(d) | vsrb(b)); }
+inline void Assembler::xvadddp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVADDDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvsubdp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVSUBDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmulsp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMULSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xvmuldp( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMULDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::mtvrd(   VectorRegister d, Register a)               { emit_int32( MTVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mfvrd(   Register        a, VectorRegister d)         { emit_int32( MFVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mtvrwz(  VectorRegister  d, Register a)               { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mfvrwz(  Register        a, VectorRegister d)         { emit_int32( MFVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
@@ -833,6 +845,7 @@
 inline void Assembler::vaddubs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vadduws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vadduhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vaddfp(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDFP_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsubcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsubshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsubsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@@ -840,9 +853,11 @@
 inline void Assembler::vsububm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsubuwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsubuhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubudm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUDM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsububs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsubuws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsubuhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubfp(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBFP_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmulesb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmuleub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmulesh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@@ -850,7 +865,9 @@
 inline void Assembler::vmulosb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmuloub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmulosh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmulosw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmulouh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmuluwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vmhaddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMHADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
 inline void Assembler::vmhraddshs(VectorRegister d,VectorRegister a,VectorRegister b, VectorRegister c) { emit_int32( VMHRADDSHS_OPCODE| vrt(d) | vra(a) | vrb(b)| vrc(c)); }
 inline void Assembler::vmladduhm(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMLADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
@@ -860,6 +877,7 @@
 inline void Assembler::vmsumshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHS_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
 inline void Assembler::vmsumuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHM_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
 inline void Assembler::vmsumuhs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHS_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmaddfp( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMADDFP_OPCODE   | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
 inline void Assembler::vsumsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUMSWS_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsum2sws(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM2SWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsum4sbs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@@ -921,6 +939,7 @@
 inline void Assembler::vsrab(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAB_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsraw(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAW_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vsrah(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAH_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpopcntw(VectorRegister d, VectorRegister b)                   { emit_int32( VPOPCNTW_OPCODE | vrt(d) | vrb(b)); }
 inline void Assembler::mtvscr(  VectorRegister b)                                     { emit_int32( MTVSCR_OPCODE   | vrb(b)); }
 inline void Assembler::mfvscr(  VectorRegister d)                                     { emit_int32( MFVSCR_OPCODE   | vrt(d)); }
 
--- a/src/hotspot/cpu/ppc/ppc.ad	Wed Sep 12 12:54:16 2018 +0200
+++ b/src/hotspot/cpu/ppc/ppc.ad	Wed Sep 12 14:24:17 2018 +0200
@@ -2224,9 +2224,35 @@
   case Op_StrEquals:
     return SpecialStringEquals;
   case Op_StrIndexOf:
-    return SpecialStringIndexOf;
   case Op_StrIndexOfChar:
     return SpecialStringIndexOf;
+  case Op_AddVB:
+  case Op_AddVS:
+  case Op_AddVI:
+  case Op_AddVF:
+  case Op_AddVD:
+  case Op_SubVB:
+  case Op_SubVS:
+  case Op_SubVI:
+  case Op_SubVF:
+  case Op_SubVD:
+  case Op_MulVS:
+  case Op_MulVF:
+  case Op_MulVD:
+  case Op_DivVF:
+  case Op_DivVD:
+  case Op_AbsVF:
+  case Op_AbsVD:
+  case Op_NegVF:
+  case Op_NegVD:
+  case Op_SqrtVF:
+  case Op_SqrtVD:
+  case Op_AddVL:
+  case Op_SubVL:
+  case Op_MulVI:
+    return SuperwordUseVSX;
+  case Op_PopCountVI:
+    return (SuperwordUseVSX && UsePopCountInstruction);
   }
 
   return true;  // Per default match rules are supported.
@@ -10007,7 +10033,7 @@
 
 // Single-precision sqrt.
 instruct sqrtF_reg(regF dst, regF src) %{
-  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+  match(Set dst (SqrtF src));
   predicate(VM_Version::has_fsqrts());
   ins_cost(DEFAULT_COST);
 
@@ -13979,6 +14005,303 @@
 %}
 
 
+//----------Vector Arithmetic Instructions--------------------------------------
+
+// Vector Addition Instructions
+
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (AddVB src1 src2));
+  predicate(n->as_Vector()->length() == 16);
+  format %{ "VADDUBM  $dst,$src1,$src2\t// add packed16B" %}
+  size(4);
+  ins_encode %{
+    __ vaddubm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (AddVS src1 src2));
+  predicate(n->as_Vector()->length() == 8);
+  format %{ "VADDUHM  $dst,$src1,$src2\t// add packed8S" %}
+  size(4);
+  ins_encode %{
+    __ vadduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (AddVI src1 src2));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "VADDUWM  $dst,$src1,$src2\t// add packed4I" %}
+  size(4);
+  ins_encode %{
+    __ vadduwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (AddVF src1 src2));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "VADDFP  $dst,$src1,$src2\t// add packed4F" %}
+  size(4);
+  ins_encode %{
+    __ vaddfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (AddVL src1 src2));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "VADDUDM  $dst,$src1,$src2\t// add packed2L" %}
+  size(4);
+  ins_encode %{
+    __ vaddudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (AddVD src1 src2));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "XVADDDP  $dst,$src1,$src2\t// add packed2D" %}
+  size(4);
+  ins_encode %{
+    __ xvadddp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Vector Subtraction Instructions
+
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (SubVB src1 src2));
+  predicate(n->as_Vector()->length() == 16);
+  format %{ "VSUBUBM  $dst,$src1,$src2\t// sub packed16B" %}
+  size(4);
+  ins_encode %{
+    __ vsububm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (SubVS src1 src2));
+  predicate(n->as_Vector()->length() == 8);
+  format %{ "VSUBUHM  $dst,$src1,$src2\t// sub packed8S" %}
+  size(4);
+  ins_encode %{
+    __ vsubuhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (SubVI src1 src2));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "VSUBUWM  $dst,$src1,$src2\t// sub packed4I" %}
+  size(4);
+  ins_encode %{
+    __ vsubuwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (SubVF src1 src2));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "VSUBFP  $dst,$src1,$src2\t// sub packed4F" %}
+  size(4);
+  ins_encode %{
+    __ vsubfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (SubVL src1 src2));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "VSUBUDM  $dst,$src1,$src2\t// sub packed2L" %}
+  size(4);
+  ins_encode %{
+    __ vsubudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (SubVD src1 src2));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "XVSUBDP  $dst,$src1,$src2\t// sub packed2D" %}
+  size(4);
+  ins_encode %{
+    __ xvsubdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Vector Multiplication Instructions
+
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2, vecX tmp) %{
+  match(Set dst (MulVS src1 src2));
+  predicate(n->as_Vector()->length() == 8);
+  effect(TEMP tmp);
+  format %{ "VSPLTISH  $tmp,0\t// mul packed8S" %}
+  format %{ "VMLADDUHM  $dst,$src1,$src2\t// mul packed8S" %}
+  size(8);
+  ins_encode %{
+    __ vspltish($tmp$$VectorSRegister->to_vr(), 0);
+    __ vmladduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr(), $tmp$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (MulVI src1 src2));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "VMULUWM  $dst,$src1,$src2\t// mul packed4I" %}
+  size(4);
+  ins_encode %{
+    __ vmuluwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (MulVF src1 src2));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "XVMULSP  $dst,$src1,$src2\t// mul packed4F" %}
+  size(4);
+  ins_encode %{
+    __ xvmulsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (MulVD src1 src2));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "XVMULDP  $dst,$src1,$src2\t// mul packed2D" %}
+  size(4);
+  ins_encode %{
+    __ xvmuldp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Vector Division Instructions
+
+instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (DivVF src1 src2));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "XVDIVSP  $dst,$src1,$src2\t// div packed4F" %}
+  size(4);
+  ins_encode %{
+    __ xvdivsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
+  match(Set dst (DivVD src1 src2));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "XVDIVDP  $dst,$src1,$src2\t// div packed2D" %}
+  size(4);
+  ins_encode %{
+    __ xvdivdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Vector Absolute Instructions
+
+instruct vabs4F_reg(vecX dst, vecX src) %{
+  match(Set dst (AbsVF src));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "XVABSSP $dst,$src\t// absolute packed4F" %}
+  size(4);
+  ins_encode %{
+    __ xvabssp($dst$$VectorSRegister, $src$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vabs2D_reg(vecX dst, vecX src) %{
+  match(Set dst (AbsVD src));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "XVABSDP $dst,$src\t// absolute packed2D" %}
+  size(4);
+  ins_encode %{
+    __ xvabsdp($dst$$VectorSRegister, $src$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Vector Negate Instructions
+
+instruct vneg4F_reg(vecX dst, vecX src) %{
+  match(Set dst (NegVF src));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "XVNEGSP $dst,$src\t// negate packed4F" %}
+  size(4);
+  ins_encode %{
+    __ xvnegsp($dst$$VectorSRegister, $src$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vneg2D_reg(vecX dst, vecX src) %{
+  match(Set dst (NegVD src));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "XVNEGDP $dst,$src\t// negate packed2D" %}
+  size(4);
+  ins_encode %{
+    __ xvnegdp($dst$$VectorSRegister, $src$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Vector Square Root Instructions
+
+instruct vsqrt4F_reg(vecX dst, vecX src) %{
+  match(Set dst (SqrtVF src));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "XVSQRTSP $dst,$src\t// sqrt packed4F" %}
+  size(4);
+  ins_encode %{
+    __ xvsqrtsp($dst$$VectorSRegister, $src$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vsqrt2D_reg(vecX dst, vecX src) %{
+  match(Set dst (SqrtVD src));
+  predicate(n->as_Vector()->length() == 2);
+  format %{ "XVSQRTDP  $dst,$src\t// sqrt packed2D" %}
+  size(4);
+  ins_encode %{
+    __ xvsqrtdp($dst$$VectorSRegister, $src$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Vector Population Count Instructions
+
+instruct vpopcnt4I_reg(vecX dst, vecX src) %{
+  match(Set dst (PopCountVI src));
+  predicate(n->as_Vector()->length() == 4);
+  format %{ "VPOPCNTW $dst,$src\t// pop count packed4I" %}
+  size(4);
+  ins_encode %{
+    __ vpopcntw($dst$$VectorSRegister->to_vr(), $src$$VectorSRegister->to_vr());
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+
 //----------Overflow Math Instructions-----------------------------------------
 
 // Note that we have to make sure that XER.SO is reset before using overflow instructions.
--- a/src/hotspot/cpu/ppc/register_definitions_ppc.cpp	Wed Sep 12 12:54:16 2018 +0200
+++ b/src/hotspot/cpu/ppc/register_definitions_ppc.cpp	Wed Sep 12 14:24:17 2018 +0200
@@ -32,4 +32,6 @@
 
 REGISTER_DEFINITION(FloatRegister, fnoreg);
 
+REGISTER_DEFINITION(VectorRegister, vnoreg);
+
 REGISTER_DEFINITION(VectorSRegister, vsnoreg);
--- a/src/hotspot/cpu/ppc/register_ppc.cpp	Wed Sep 12 12:54:16 2018 +0200
+++ b/src/hotspot/cpu/ppc/register_ppc.cpp	Wed Sep 12 14:24:17 2018 +0200
@@ -87,6 +87,12 @@
 
 // Method to convert a VectorRegister to a Vector-Scalar Register (VectorSRegister)
 VectorSRegister VectorRegisterImpl::to_vsr() const {
-  if (this == vnoreg) { return vsnoregi; }
+  if (this == vnoreg) { return vsnoreg; }
   return as_VectorSRegister(encoding() + 32);
 }
+
+// Method to convert a VectorSRegister to a Vector Register (VectorRegister)
+VectorRegister VectorSRegisterImpl::to_vr() const {
+  if (this == vsnoreg) { return vnoreg; }
+  return as_VectorRegister(encoding() - 32);
+}
--- a/src/hotspot/cpu/ppc/register_ppc.hpp	Wed Sep 12 12:54:16 2018 +0200
+++ b/src/hotspot/cpu/ppc/register_ppc.hpp	Wed Sep 12 14:24:17 2018 +0200
@@ -521,6 +521,9 @@
   bool is_valid() const { return 0 <=  value() &&  value() < number_of_registers; }
 
   const char* name() const;
+
+  // convert to VR
+  VectorRegister to_vr() const;
 };
 
 // The Vector-Scalar (VSX) registers of the POWER architecture.
@@ -593,7 +596,7 @@
 CONSTANT_REGISTER_DECLARATION(VectorSRegister, VSR63, (63));
 
 #ifndef DONT_USE_REGISTER_DEFINES
-#define vsnoregi ((VectorSRegister)(vsnoreg_VectorSRegisterEnumValue))
+#define vsnoreg ((VectorSRegister)(vsnoreg_VectorSRegisterEnumValue))
 #define VSR0    ((VectorSRegister)(   VSR0_VectorSRegisterEnumValue))
 #define VSR1    ((VectorSRegister)(   VSR1_VectorSRegisterEnumValue))
 #define VSR2    ((VectorSRegister)(   VSR2_VectorSRegisterEnumValue))