hotspot/src/cpu/x86/vm/x86.ad
changeset 34162 16b54851eaf6
parent 33469 30f4811eded0
child 35581 dd47cf4734f2
equal deleted inserted replaced
34159:f401f5b4327e 34162:16b54851eaf6
  1714   }
  1714   }
  1715 
  1715 
  1716   return ret_value;  // Per default match rules are supported.
  1716   return ret_value;  // Per default match rules are supported.
  1717 }
  1717 }
  1718 
  1718 
       
  1719 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
       
  1720   // identify extra cases that we might want to provide match rules for
       
  1721   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
       
  1722   bool ret_value = match_rule_supported(opcode);
       
  1723   if (ret_value) {
       
  1724     switch (opcode) {
       
  1725       case Op_AddVB:
       
  1726       case Op_SubVB:
       
  1727         if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
       
  1728           ret_value = false;
       
  1729         break;
       
  1730       case Op_URShiftVS:
       
  1731       case Op_RShiftVS:
       
  1732       case Op_LShiftVS:
       
  1733       case Op_MulVS:
       
  1734       case Op_AddVS:
       
  1735       case Op_SubVS:
       
  1736         if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
       
  1737           ret_value = false;
       
  1738         break;
       
  1739       case Op_CMoveVD:
       
  1740         if (vlen != 4)
       
  1741           ret_value  = false;
       
  1742         break;
       
  1743     }
       
  1744   }
       
  1745 
       
  1746   return ret_value;  // Per default match rules are supported.
       
  1747 }
       
  1748 
  1719 const int Matcher::float_pressure(int default_pressure_threshold) {
  1749 const int Matcher::float_pressure(int default_pressure_threshold) {
  1720   int float_pressure_threshold = default_pressure_threshold;
  1750   int float_pressure_threshold = default_pressure_threshold;
  1721 #ifdef _LP64
  1751 #ifdef _LP64
  1722   if (UseAVX > 2) {
  1752   if (UseAVX > 2) {
  1723     // Increase pressure threshold on machines with AVX3 which have
  1753     // Increase pressure threshold on machines with AVX3 which have
  1757   case T_CHAR:
  1787   case T_CHAR:
  1758     if (size < 4) return 0;
  1788     if (size < 4) return 0;
  1759     break;
  1789     break;
  1760   case T_BYTE:
  1790   case T_BYTE:
  1761     if (size < 4) return 0;
  1791     if (size < 4) return 0;
  1762     if ((size > 32) && !VM_Version::supports_avx512bw()) return 0;
       
  1763     break;
  1792     break;
  1764   case T_SHORT:
  1793   case T_SHORT:
  1765     if (size < 4) return 0;
  1794     if (size < 4) return 0;
  1766     if ((size > 16) && !VM_Version::supports_avx512bw()) return 0;
       
  1767     break;
  1795     break;
  1768   default:
  1796   default:
  1769     ShouldNotReachHere();
  1797     ShouldNotReachHere();
  1770   }
  1798   }
  1771   return size;
  1799   return size;
  1965 #endif
  1993 #endif
  1966   }
  1994   }
  1967   bool is_single_byte = false;
  1995   bool is_single_byte = false;
  1968   int vec_len = 0;
  1996   int vec_len = 0;
  1969   if ((UseAVX > 2) && (stack_offset != 0)) {
  1997   if ((UseAVX > 2) && (stack_offset != 0)) {
       
  1998     int tuple_type = Assembler::EVEX_FVM;
       
  1999     int input_size = Assembler::EVEX_32bit;
  1970     switch (ireg) {
  2000     switch (ireg) {
  1971 	case Op_VecS:
  2001     case Op_VecS:
       
  2002       tuple_type = Assembler::EVEX_T1S;
       
  2003       break;
  1972     case Op_VecD:
  2004     case Op_VecD:
       
  2005       tuple_type = Assembler::EVEX_T1S;
       
  2006       input_size = Assembler::EVEX_64bit;
       
  2007       break;
  1973     case Op_VecX:
  2008     case Op_VecX:
  1974 	  break;
  2009       break;
  1975 	case Op_VecY:
  2010     case Op_VecY:
  1976 	  vec_len = 1;
  2011       vec_len = 1;
  1977 	  break;
  2012       break;
  1978     case Op_VecZ:
  2013     case Op_VecZ:
  1979 	  vec_len = 2;
  2014       vec_len = 2;
  1980 	  break;
  2015       break;
  1981     }
  2016     }
  1982     is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0);
  2017     is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
  1983   }
  2018   }
  1984   int offset_size = 0;
  2019   int offset_size = 0;
  1985   int size = 5;
  2020   int size = 5;
  1986   if (UseAVX > 2 ) {
  2021   if (UseAVX > 2 ) {
  1987     if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { 
  2022     if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
  1988       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  2023       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  1989       size += 2; // Need an additional two bytes for EVEX encoding
  2024       size += 2; // Need an additional two bytes for EVEX encoding
  1990     } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { 
  2025     } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
  1991       offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
  2026       offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
  1992     } else {
  2027     } else {
  1993       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  2028       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  1994       size += 2; // Need an additional two bytes for EVEX encodding
  2029       size += 2; // Need an additional two bytes for EVEX encodding
  1995     }
  2030     }
  2709   %}
  2744   %}
  2710   ins_pipe(pipe_slow);
  2745   ins_pipe(pipe_slow);
  2711 %}
  2746 %}
  2712 
  2747 
  2713 instruct absF_reg_reg(regF dst, regF src) %{
  2748 instruct absF_reg_reg(regF dst, regF src) %{
  2714   predicate(UseAVX > 0);
  2749   predicate(VM_Version::supports_avxonly());
  2715   match(Set dst (AbsF src));
  2750   match(Set dst (AbsF src));
  2716   ins_cost(150);
  2751   ins_cost(150);
  2717   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
  2752   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
  2718   ins_encode %{
  2753   ins_encode %{
  2719     int vector_len = 0;
  2754     int vector_len = 0;
  2720     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
  2755     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
  2721               ExternalAddress(float_signmask()), vector_len);
  2756               ExternalAddress(float_signmask()), vector_len);
  2722   %}
  2757   %}
  2723   ins_pipe(pipe_slow);
  2758   ins_pipe(pipe_slow);
  2724 %}
  2759 %}
       
  2760 
       
  2761 #ifdef _LP64
       
  2762 instruct absF_reg_reg_evex(regF dst, regF src) %{
       
  2763   predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
       
  2764   match(Set dst (AbsF src));
       
  2765   ins_cost(150);
       
  2766   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
       
  2767   ins_encode %{
       
  2768     int vector_len = 0;
       
  2769     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
       
  2770               ExternalAddress(float_signmask()), vector_len);
       
  2771   %}
       
  2772   ins_pipe(pipe_slow);
       
  2773 %}
       
  2774 
       
  2775 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{
       
  2776   predicate(VM_Version::supports_avx512novl());
       
  2777   match(Set dst (AbsF src1));
       
  2778   effect(TEMP src2);
       
  2779   ins_cost(150);
       
  2780   format %{ "vabsss  $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %}
       
  2781   ins_encode %{
       
  2782     int vector_len = 0;
       
  2783     __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
       
  2784               ExternalAddress(float_signmask()), vector_len);
       
  2785   %}
       
  2786   ins_pipe(pipe_slow);
       
  2787 %}
       
  2788 #else // _LP64
       
  2789 instruct absF_reg_reg_evex(regF dst, regF src) %{
       
  2790   predicate(UseAVX > 2);
       
  2791   match(Set dst (AbsF src));
       
  2792   ins_cost(150);
       
  2793   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
       
  2794   ins_encode %{
       
  2795     int vector_len = 0;
       
  2796     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
       
  2797               ExternalAddress(float_signmask()), vector_len);
       
  2798   %}
       
  2799   ins_pipe(pipe_slow);
       
  2800 %}
       
  2801 #endif
  2725 
  2802 
  2726 instruct absD_reg(regD dst) %{
  2803 instruct absD_reg(regD dst) %{
  2727   predicate((UseSSE>=2) && (UseAVX == 0));
  2804   predicate((UseSSE>=2) && (UseAVX == 0));
  2728   match(Set dst (AbsD dst));
  2805   match(Set dst (AbsD dst));
  2729   ins_cost(150);
  2806   ins_cost(150);
  2734   %}
  2811   %}
  2735   ins_pipe(pipe_slow);
  2812   ins_pipe(pipe_slow);
  2736 %}
  2813 %}
  2737 
  2814 
  2738 instruct absD_reg_reg(regD dst, regD src) %{
  2815 instruct absD_reg_reg(regD dst, regD src) %{
  2739   predicate(UseAVX > 0);
  2816   predicate(VM_Version::supports_avxonly());
  2740   match(Set dst (AbsD src));
  2817   match(Set dst (AbsD src));
  2741   ins_cost(150);
  2818   ins_cost(150);
  2742   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
  2819   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
  2743             "# abs double by sign masking" %}
  2820             "# abs double by sign masking" %}
  2744   ins_encode %{
  2821   ins_encode %{
  2746     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
  2823     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
  2747               ExternalAddress(double_signmask()), vector_len);
  2824               ExternalAddress(double_signmask()), vector_len);
  2748   %}
  2825   %}
  2749   ins_pipe(pipe_slow);
  2826   ins_pipe(pipe_slow);
  2750 %}
  2827 %}
       
  2828 
       
  2829 #ifdef _LP64
       
  2830 instruct absD_reg_reg_evex(regD dst, regD src) %{
       
  2831   predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
       
  2832   match(Set dst (AbsD src));
       
  2833   ins_cost(150);
       
  2834   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
       
  2835             "# abs double by sign masking" %}
       
  2836   ins_encode %{
       
  2837     int vector_len = 0;
       
  2838     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
       
  2839               ExternalAddress(double_signmask()), vector_len);
       
  2840   %}
       
  2841   ins_pipe(pipe_slow);
       
  2842 %}
       
  2843 
       
  2844 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{
       
  2845   predicate(VM_Version::supports_avx512novl());
       
  2846   match(Set dst (AbsD src1));
       
  2847   effect(TEMP src2);
       
  2848   ins_cost(150);
       
  2849   format %{ "vabssd  $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %}
       
  2850   ins_encode %{
       
  2851     int vector_len = 0;
       
  2852     __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
       
  2853               ExternalAddress(double_signmask()), vector_len);
       
  2854   %}
       
  2855   ins_pipe(pipe_slow);
       
  2856 %}
       
  2857 #else // _LP64
       
  2858 instruct absD_reg_reg_evex(regD dst, regD src) %{
       
  2859   predicate(UseAVX > 2);
       
  2860   match(Set dst (AbsD src));
       
  2861   ins_cost(150);
       
  2862   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
       
  2863             "# abs double by sign masking" %}
       
  2864   ins_encode %{
       
  2865     int vector_len = 0;
       
  2866     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
       
  2867               ExternalAddress(double_signmask()), vector_len);
       
  2868   %}
       
  2869   ins_pipe(pipe_slow);
       
  2870 %}
       
  2871 #endif
  2751 
  2872 
  2752 instruct negF_reg(regF dst) %{
  2873 instruct negF_reg(regF dst) %{
  2753   predicate((UseSSE>=1) && (UseAVX == 0));
  2874   predicate((UseSSE>=1) && (UseAVX == 0));
  2754   match(Set dst (NegF dst));
  2875   match(Set dst (NegF dst));
  2755   ins_cost(150);
  2876   ins_cost(150);
  4552   %}
  4673   %}
  4553   ins_pipe( pipe_slow );
  4674   ins_pipe( pipe_slow );
  4554 %}
  4675 %}
  4555 
  4676 
  4556 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
  4677 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
  4557   predicate(UseAVX > 0 && UseAVX < 3);
  4678   predicate(VM_Version::supports_avxonly());
  4558   match(Set dst (AddReductionVI src1 src2));
  4679   match(Set dst (AddReductionVI src1 src2));
  4559   effect(TEMP tmp, TEMP tmp2);
  4680   effect(TEMP tmp, TEMP tmp2);
  4560   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
  4681   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
  4561             "movd     $tmp2,$src1\n\t"
  4682             "movd     $tmp2,$src1\n\t"
  4562             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
  4683             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
  4592 %}
  4713 %}
  4593 
  4714 
  4594 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
  4715 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
  4595   predicate(UseSSE > 2 && UseAVX == 0);
  4716   predicate(UseSSE > 2 && UseAVX == 0);
  4596   match(Set dst (AddReductionVI src1 src2));
  4717   match(Set dst (AddReductionVI src1 src2));
  4597   effect(TEMP tmp2, TEMP tmp);
  4718   effect(TEMP tmp, TEMP tmp2);
  4598   format %{ "movdqu  $tmp2,$src2\n\t"
  4719   format %{ "movdqu  $tmp,$src2\n\t"
  4599             "phaddd  $tmp2,$tmp2\n\t"
  4720             "phaddd  $tmp,$tmp\n\t"
  4600             "phaddd  $tmp2,$tmp2\n\t"
  4721             "phaddd  $tmp,$tmp\n\t"
  4601             "movd    $tmp,$src1\n\t"
  4722             "movd    $tmp2,$src1\n\t"
  4602             "paddd   $tmp,$tmp2\n\t"
  4723             "paddd   $tmp2,$tmp\n\t"
  4603             "movd    $dst,$tmp\t! add reduction4I" %}
  4724             "movd    $dst,$tmp2\t! add reduction4I" %}
  4604   ins_encode %{
  4725   ins_encode %{
  4605     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
  4726     __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister);
  4606     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
  4727     __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
  4607     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
  4728     __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
  4608     __ movdl($tmp$$XMMRegister, $src1$$Register);
  4729     __ movdl($tmp2$$XMMRegister, $src1$$Register);
  4609     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
  4730     __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister);
  4610     __ movdl($dst$$Register, $tmp$$XMMRegister);
  4731     __ movdl($dst$$Register, $tmp2$$XMMRegister);
  4611   %}
  4732   %}
  4612   ins_pipe( pipe_slow );
  4733   ins_pipe( pipe_slow );
  4613 %}
  4734 %}
  4614 
  4735 
  4615 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
  4736 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
  4616   predicate(UseAVX > 0 && UseAVX < 3);
  4737   predicate(VM_Version::supports_avxonly());
  4617   match(Set dst (AddReductionVI src1 src2));
  4738   match(Set dst (AddReductionVI src1 src2));
  4618   effect(TEMP tmp, TEMP tmp2);
  4739   effect(TEMP tmp, TEMP tmp2);
  4619   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
  4740   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
  4620             "vphaddd  $tmp,$tmp,$tmp2\n\t"
  4741             "vphaddd  $tmp,$tmp,$tmp\n\t"
  4621             "movd     $tmp2,$src1\n\t"
  4742             "movd     $tmp2,$src1\n\t"
  4622             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
  4743             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
  4623             "movd     $dst,$tmp2\t! add reduction4I" %}
  4744             "movd     $dst,$tmp2\t! add reduction4I" %}
  4624   ins_encode %{
  4745   ins_encode %{
  4625     int vector_len = 0;
  4746     int vector_len = 0;
  4626     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
  4747     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
  4627     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
  4748     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
  4628     __ movdl($tmp2$$XMMRegister, $src1$$Register);
  4749     __ movdl($tmp2$$XMMRegister, $src1$$Register);
  4629     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
  4750     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
  4630     __ movdl($dst$$Register, $tmp2$$XMMRegister);
  4751     __ movdl($dst$$Register, $tmp2$$XMMRegister);
  4631   %}
  4752   %}
  4632   ins_pipe( pipe_slow );
  4753   ins_pipe( pipe_slow );
  4655   %}
  4776   %}
  4656   ins_pipe( pipe_slow );
  4777   ins_pipe( pipe_slow );
  4657 %}
  4778 %}
  4658 
  4779 
  4659 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
  4780 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
  4660   predicate(UseAVX > 0 && UseAVX < 3);
  4781   predicate(VM_Version::supports_avxonly());
  4661   match(Set dst (AddReductionVI src1 src2));
  4782   match(Set dst (AddReductionVI src1 src2));
  4662   effect(TEMP tmp, TEMP tmp2);
  4783   effect(TEMP tmp, TEMP tmp2);
  4663   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
  4784   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
  4664             "vphaddd  $tmp,$tmp,$tmp2\n\t"
  4785             "vphaddd  $tmp,$tmp,$tmp2\n\t"
  4665             "vextracti128  $tmp2,$tmp\n\t"
  4786             "vextracti128  $tmp2,$tmp\n\t"
  4710 
  4831 
  4711 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
  4832 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
  4712   predicate(UseAVX > 2);
  4833   predicate(UseAVX > 2);
  4713   match(Set dst (AddReductionVI src1 src2));
  4834   match(Set dst (AddReductionVI src1 src2));
  4714   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  4835   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  4715   format %{ "vextracti64x4  $tmp3,$src2\n\t"
  4836   format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
  4716             "vpaddd  $tmp3,$tmp3,$src2\n\t"
  4837             "vpaddd  $tmp3,$tmp3,$src2\n\t"
  4717             "vextracti128   $tmp,$tmp3\n\t"
  4838             "vextracti128   $tmp,$tmp3\n\t"
  4718             "vpaddd  $tmp,$tmp,$tmp3\n\t"
  4839             "vpaddd  $tmp,$tmp,$tmp3\n\t"
  4719             "pshufd  $tmp2,$tmp,0xE\n\t"
  4840             "pshufd  $tmp2,$tmp,0xE\n\t"
  4720             "vpaddd  $tmp,$tmp,$tmp2\n\t"
  4841             "vpaddd  $tmp,$tmp,$tmp2\n\t"
  4722             "vpaddd  $tmp,$tmp,$tmp2\n\t"
  4843             "vpaddd  $tmp,$tmp,$tmp2\n\t"
  4723             "movd    $tmp2,$src1\n\t"
  4844             "movd    $tmp2,$src1\n\t"
  4724             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
  4845             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
  4725             "movd    $dst,$tmp2\t! mul reduction16I" %}
  4846             "movd    $dst,$tmp2\t! mul reduction16I" %}
  4726   ins_encode %{
  4847   ins_encode %{
  4727     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
  4848     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
  4728     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
  4849     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
  4729     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
  4850     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
  4730     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
  4851     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
  4731     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
  4852     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
  4732     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
  4853     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
  4761 
  4882 
  4762 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
  4883 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
  4763   predicate(UseAVX > 2);
  4884   predicate(UseAVX > 2);
  4764   match(Set dst (AddReductionVL src1 src2));
  4885   match(Set dst (AddReductionVL src1 src2));
  4765   effect(TEMP tmp, TEMP tmp2);
  4886   effect(TEMP tmp, TEMP tmp2);
  4766   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
  4887   format %{ "vextracti128  $tmp,$src2\n\t"
  4767             "vpaddq  $tmp2,$tmp,$src2\n\t"
  4888             "vpaddq  $tmp2,$tmp,$src2\n\t"
  4768             "pshufd  $tmp,$tmp2,0xE\n\t"
  4889             "pshufd  $tmp,$tmp2,0xE\n\t"
  4769             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4890             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4770             "movdq   $tmp,$src1\n\t"
  4891             "movdq   $tmp,$src1\n\t"
  4771             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4892             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4772             "movdq   $dst,$tmp2\t! add reduction4L" %}
  4893             "movdq   $dst,$tmp2\t! add reduction4L" %}
  4773   ins_encode %{
  4894   ins_encode %{
  4774     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
  4895     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
  4775     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
  4896     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
  4776     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  4897     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  4777     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  4898     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  4778     __ movdq($tmp$$XMMRegister, $src1$$Register);
  4899     __ movdq($tmp$$XMMRegister, $src1$$Register);
  4779     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  4900     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  4784 
  4905 
  4785 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
  4906 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
  4786   predicate(UseAVX > 2);
  4907   predicate(UseAVX > 2);
  4787   match(Set dst (AddReductionVL src1 src2));
  4908   match(Set dst (AddReductionVL src1 src2));
  4788   effect(TEMP tmp, TEMP tmp2);
  4909   effect(TEMP tmp, TEMP tmp2);
  4789   format %{ "vextracti64x4  $tmp2,$src2\n\t"
  4910   format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
  4790             "vpaddq  $tmp2,$tmp2,$src2\n\t"
  4911             "vpaddq  $tmp2,$tmp2,$src2\n\t"
  4791             "vextracti128   $tmp,$tmp2\n\t"
  4912             "vextracti128   $tmp,$tmp2\n\t"
  4792             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4913             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4793             "pshufd  $tmp,$tmp2,0xE\n\t"
  4914             "pshufd  $tmp,$tmp2,0xE\n\t"
  4794             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4915             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4795             "movdq   $tmp,$src1\n\t"
  4916             "movdq   $tmp,$src1\n\t"
  4796             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4917             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
  4797             "movdq   $dst,$tmp2\t! add reduction8L" %}
  4918             "movdq   $dst,$tmp2\t! add reduction8L" %}
  4798   ins_encode %{
  4919   ins_encode %{
  4799     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
  4920     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
  4800     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
  4921     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
  4801     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
  4922     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
  4802     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  4923     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  4803     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  4924     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  4804     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  4925     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  4808   %}
  4929   %}
  4809   ins_pipe( pipe_slow );
  4930   ins_pipe( pipe_slow );
  4810 %}
  4931 %}
  4811 #endif
  4932 #endif
  4812 
  4933 
  4813 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
  4934 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
  4814   predicate(UseSSE >= 1 && UseAVX == 0);
  4935   predicate(UseSSE >= 1 && UseAVX == 0);
  4815   match(Set dst (AddReductionVF src1 src2));
  4936   match(Set dst (AddReductionVF dst src2));
  4816   effect(TEMP tmp, TEMP tmp2);
  4937   effect(TEMP dst, TEMP tmp);
  4817   format %{ "movdqu  $tmp,$src1\n\t"
  4938   format %{ "addss   $dst,$src2\n\t"
  4818             "addss   $tmp,$src2\n\t"
  4939             "pshufd  $tmp,$src2,0x01\n\t"
  4819             "pshufd  $tmp2,$src2,0x01\n\t"
  4940             "addss   $dst,$tmp\t! add reduction2F" %}
  4820             "addss   $tmp,$tmp2\n\t"
  4941   ins_encode %{
  4821             "movdqu  $dst,$tmp\t! add reduction2F" %}
  4942     __ addss($dst$$XMMRegister, $src2$$XMMRegister);
  4822   ins_encode %{
  4943     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  4823     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
  4944     __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
  4824     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
  4945   %}
  4825     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
  4946   ins_pipe( pipe_slow );
  4826     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
  4947 %}
  4827     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
  4948 
  4828   %}
  4949 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
  4829   ins_pipe( pipe_slow );
       
  4830 %}
       
  4831 
       
  4832 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
       
  4833   predicate(UseAVX > 0);
  4950   predicate(UseAVX > 0);
  4834   match(Set dst (AddReductionVF src1 src2));
  4951   match(Set dst (AddReductionVF dst src2));
  4835   effect(TEMP tmp2, TEMP tmp);
  4952   effect(TEMP dst, TEMP tmp);
  4836   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
  4953   format %{ "vaddss  $dst,$dst,$src2\n\t"
  4837             "pshufd  $tmp,$src2,0x01\n\t"
  4954             "pshufd  $tmp,$src2,0x01\n\t"
  4838             "vaddss  $dst,$tmp2,$tmp\t! add reduction2F" %}
  4955             "vaddss  $dst,$dst,$tmp\t! add reduction2F" %}
  4839   ins_encode %{
  4956   ins_encode %{
  4840     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  4957     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  4841     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  4958     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  4842     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  4959     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4843   %}
  4960   %}
  4844   ins_pipe( pipe_slow );
  4961   ins_pipe( pipe_slow );
  4845 %}
  4962 %}
  4846 
  4963 
  4847 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
  4964 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
  4848   predicate(UseSSE >= 1 && UseAVX == 0);
  4965   predicate(UseSSE >= 1 && UseAVX == 0);
  4849   match(Set dst (AddReductionVF src1 src2));
  4966   match(Set dst (AddReductionVF dst src2));
  4850   effect(TEMP tmp, TEMP tmp2);
  4967   effect(TEMP dst, TEMP tmp);
  4851   format %{ "movdqu  $tmp,$src1\n\t"
  4968   format %{ "addss   $dst,$src2\n\t"
  4852             "addss   $tmp,$src2\n\t"
  4969             "pshufd  $tmp,$src2,0x01\n\t"
  4853             "pshufd  $tmp2,$src2,0x01\n\t"
  4970             "addss   $dst,$tmp\n\t"
  4854             "addss   $tmp,$tmp2\n\t"
  4971             "pshufd  $tmp,$src2,0x02\n\t"
  4855             "pshufd  $tmp2,$src2,0x02\n\t"
  4972             "addss   $dst,$tmp\n\t"
  4856             "addss   $tmp,$tmp2\n\t"
  4973             "pshufd  $tmp,$src2,0x03\n\t"
  4857             "pshufd  $tmp2,$src2,0x03\n\t"
  4974             "addss   $dst,$tmp\t! add reduction4F" %}
  4858             "addss   $tmp,$tmp2\n\t"
  4975   ins_encode %{
  4859             "movdqu  $dst,$tmp\t! add reduction4F" %}
  4976     __ addss($dst$$XMMRegister, $src2$$XMMRegister);
  4860   ins_encode %{
  4977     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  4861     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
  4978     __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
  4862     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
  4979     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  4863     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
  4980     __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
  4864     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
  4981     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  4865     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
  4982     __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
  4866     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
  4983   %}
  4867     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
  4984   ins_pipe( pipe_slow );
  4868     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
  4985 %}
  4869     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
  4986 
  4870   %}
  4987 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
  4871   ins_pipe( pipe_slow );
       
  4872 %}
       
  4873 
       
  4874 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
       
  4875   predicate(UseAVX > 0);
  4988   predicate(UseAVX > 0);
  4876   match(Set dst (AddReductionVF src1 src2));
  4989   match(Set dst (AddReductionVF dst src2));
  4877   effect(TEMP tmp, TEMP tmp2);
  4990   effect(TEMP tmp, TEMP dst);
  4878   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
  4991   format %{ "vaddss  $dst,dst,$src2\n\t"
  4879             "pshufd  $tmp,$src2,0x01\n\t"
  4992             "pshufd  $tmp,$src2,0x01\n\t"
  4880             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  4993             "vaddss  $dst,$dst,$tmp\n\t"
  4881             "pshufd  $tmp,$src2,0x02\n\t"
  4994             "pshufd  $tmp,$src2,0x02\n\t"
  4882             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  4995             "vaddss  $dst,$dst,$tmp\n\t"
  4883             "pshufd  $tmp,$src2,0x03\n\t"
  4996             "pshufd  $tmp,$src2,0x03\n\t"
  4884             "vaddss  $dst,$tmp2,$tmp\t! add reduction4F" %}
  4997             "vaddss  $dst,$dst,$tmp\t! add reduction4F" %}
  4885   ins_encode %{
  4998   ins_encode %{
  4886     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  4999     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  4887     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5000     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  4888     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5001     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4889     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5002     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  4890     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5003     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4891     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5004     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  4892     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5005     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4893   %}
  5006   %}
  4894   ins_pipe( pipe_slow );
  5007   ins_pipe( pipe_slow );
  4895 %}
  5008 %}
  4896 
  5009 
  4897 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
  5010 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
  4898   predicate(UseAVX > 0);
  5011   predicate(UseAVX > 0);
  4899   match(Set dst (AddReductionVF src1 src2));
  5012   match(Set dst (AddReductionVF dst src2));
  4900   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5013   effect(TEMP tmp, TEMP dst, TEMP tmp2);
  4901   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
  5014   format %{ "vaddss  $dst,$dst,$src2\n\t"
  4902             "pshufd  $tmp,$src2,0x01\n\t"
  5015             "pshufd  $tmp,$src2,0x01\n\t"
  4903             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5016             "vaddss  $dst,$dst,$tmp\n\t"
  4904             "pshufd  $tmp,$src2,0x02\n\t"
  5017             "pshufd  $tmp,$src2,0x02\n\t"
  4905             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5018             "vaddss  $dst,$dst,$tmp\n\t"
  4906             "pshufd  $tmp,$src2,0x03\n\t"
  5019             "pshufd  $tmp,$src2,0x03\n\t"
  4907             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5020             "vaddss  $dst,$dst,$tmp\n\t"
  4908             "vextractf128  $tmp3,$src2\n\t"
  5021             "vextractf128  $tmp2,$src2\n\t"
  4909             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
  5022             "vaddss  $dst,$dst,$tmp2\n\t"
  4910             "pshufd  $tmp,$tmp3,0x01\n\t"
  5023             "pshufd  $tmp,$tmp2,0x01\n\t"
  4911             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5024             "vaddss  $dst,$dst,$tmp\n\t"
  4912             "pshufd  $tmp,$tmp3,0x02\n\t"
  5025             "pshufd  $tmp,$tmp2,0x02\n\t"
  4913             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5026             "vaddss  $dst,$dst,$tmp\n\t"
  4914             "pshufd  $tmp,$tmp3,0x03\n\t"
  5027             "pshufd  $tmp,$tmp2,0x03\n\t"
  4915             "vaddss  $dst,$tmp2,$tmp\t! add reduction8F" %}
  5028             "vaddss  $dst,$dst,$tmp\t! add reduction8F" %}
  4916   ins_encode %{
  5029   ins_encode %{
  4917     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5030     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  4918     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5031     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  4919     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5032     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4920     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5033     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  4921     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5034     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4922     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5035     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  4923     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5036     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4924     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
  5037     __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
  4925     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5038     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  4926     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
  5039     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
  4927     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5040     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4928     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
  5041     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
  4929     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5042     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4930     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
  5043     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
  4931     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5044     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4932   %}
  5045   %}
  4933   ins_pipe( pipe_slow );
  5046   ins_pipe( pipe_slow );
  4934 %}
  5047 %}
  4935 
  5048 
  4936 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
  5049 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
  4937   predicate(UseAVX > 2);
  5050   predicate(UseAVX > 2);
  4938   match(Set dst (AddReductionVF src1 src2));
  5051   match(Set dst (AddReductionVF dst src2));
  4939   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5052   effect(TEMP tmp, TEMP dst, TEMP tmp2);
  4940   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
  5053   format %{ "vaddss  $dst,$dst,$src2\n\t"
  4941             "pshufd  $tmp,$src2,0x01\n\t"
  5054             "pshufd  $tmp,$src2,0x01\n\t"
  4942             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5055             "vaddss  $dst,$dst,$tmp\n\t"
  4943             "pshufd  $tmp,$src2,0x02\n\t"
  5056             "pshufd  $tmp,$src2,0x02\n\t"
  4944             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5057             "vaddss  $dst,$dst,$tmp\n\t"
  4945             "pshufd  $tmp,$src2,0x03\n\t"
  5058             "pshufd  $tmp,$src2,0x03\n\t"
  4946             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5059             "vaddss  $dst,$dst,$tmp\n\t"
  4947             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
  5060             "vextractf32x4  $tmp2,$src2, 0x1\n\t"
  4948             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
  5061             "vaddss  $dst,$dst,$tmp2\n\t"
  4949             "pshufd  $tmp,$tmp3,0x01\n\t"
  5062             "pshufd  $tmp,$tmp2,0x01\n\t"
  4950             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5063             "vaddss  $dst,$dst,$tmp\n\t"
  4951             "pshufd  $tmp,$tmp3,0x02\n\t"
  5064             "pshufd  $tmp,$tmp2,0x02\n\t"
  4952             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5065             "vaddss  $dst,$dst,$tmp\n\t"
  4953             "pshufd  $tmp,$tmp3,0x03\n\t"
  5066             "pshufd  $tmp,$tmp2,0x03\n\t"
  4954             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5067             "vaddss  $dst,$dst,$tmp\n\t"
  4955             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
  5068             "vextractf32x4  $tmp2,$src2, 0x2\n\t"
  4956             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
  5069             "vaddss  $dst,$dst,$tmp2\n\t"
  4957             "pshufd  $tmp,$tmp3,0x01\n\t"
  5070             "pshufd  $tmp,$tmp2,0x01\n\t"
  4958             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5071             "vaddss  $dst,$dst,$tmp\n\t"
  4959             "pshufd  $tmp,$tmp3,0x02\n\t"
  5072             "pshufd  $tmp,$tmp2,0x02\n\t"
  4960             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5073             "vaddss  $dst,$dst,$tmp\n\t"
  4961             "pshufd  $tmp,$tmp3,0x03\n\t"
  5074             "pshufd  $tmp,$tmp2,0x03\n\t"
  4962             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5075             "vaddss  $dst,$dst,$tmp\n\t"
  4963             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
  5076             "vextractf32x4  $tmp2,$src2, 0x3\n\t"
  4964             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
  5077             "vaddss  $dst,$dst,$tmp2\n\t"
  4965             "pshufd  $tmp,$tmp3,0x01\n\t"
  5078             "pshufd  $tmp,$tmp2,0x01\n\t"
  4966             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5079             "vaddss  $dst,$dst,$tmp\n\t"
  4967             "pshufd  $tmp,$tmp3,0x02\n\t"
  5080             "pshufd  $tmp,$tmp2,0x02\n\t"
  4968             "vaddss  $tmp2,$tmp2,$tmp\n\t"
  5081             "vaddss  $dst,$dst,$tmp\n\t"
  4969             "pshufd  $tmp,$tmp3,0x03\n\t"
  5082             "pshufd  $tmp,$tmp2,0x03\n\t"
  4970             "vaddss  $dst,$tmp2,$tmp\t! add reduction16F" %}
  5083             "vaddss  $dst,$dst,$tmp\t! add reduction16F" %}
  4971   ins_encode %{
  5084   ins_encode %{
  4972     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5085     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  4973     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5086     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  4974     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5087     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4975     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5088     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  4976     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5089     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4977     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5090     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  4978     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5091     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4979     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
  5092     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
  4980     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5093     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  4981     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
  5094     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
  4982     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5095     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4983     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
  5096     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
  4984     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5097     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4985     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
  5098     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
  4986     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5099     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4987     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
  5100     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
  4988     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5101     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  4989     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
  5102     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
  4990     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5103     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4991     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
  5104     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
  4992     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5105     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4993     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
  5106     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
  4994     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5107     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4995     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
  5108     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
  4996     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5109     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  4997     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
  5110     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
  4998     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5111     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  4999     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
  5112     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
  5000     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5113     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5001     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
  5114     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
  5002     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5115     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5003   %}
  5116   %}
  5004   ins_pipe( pipe_slow );
  5117   ins_pipe( pipe_slow );
  5005 %}
  5118 %}
  5006 
  5119 
  5007 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
  5120 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
  5008   predicate(UseSSE >= 1 && UseAVX == 0);
  5121   predicate(UseSSE >= 1 && UseAVX == 0);
  5009   match(Set dst (AddReductionVD src1 src2));
  5122   match(Set dst (AddReductionVD dst src2));
  5010   effect(TEMP tmp, TEMP dst);
  5123   effect(TEMP tmp, TEMP dst);
  5011   format %{ "movdqu  $tmp,$src1\n\t"
  5124   format %{ "addsd   $dst,$src2\n\t"
  5012             "addsd   $tmp,$src2\n\t"
  5125             "pshufd  $tmp,$src2,0xE\n\t"
  5013             "pshufd  $dst,$src2,0xE\n\t"
       
  5014             "addsd   $dst,$tmp\t! add reduction2D" %}
  5126             "addsd   $dst,$tmp\t! add reduction2D" %}
  5015   ins_encode %{
  5127   ins_encode %{
  5016     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
  5128     __ addsd($dst$$XMMRegister, $src2$$XMMRegister);
  5017     __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
  5129     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5018     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  5019     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
  5130     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
  5020   %}
  5131   %}
  5021   ins_pipe( pipe_slow );
  5132   ins_pipe( pipe_slow );
  5022 %}
  5133 %}
  5023 
  5134 
  5024 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
  5135 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
  5025   predicate(UseAVX > 0);
  5136   predicate(UseAVX > 0);
  5026   match(Set dst (AddReductionVD src1 src2));
  5137   match(Set dst (AddReductionVD dst src2));
  5027   effect(TEMP tmp, TEMP tmp2);
  5138   effect(TEMP tmp, TEMP dst);
  5028   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
  5139   format %{ "vaddsd  $dst,$dst,$src2\n\t"
  5029             "pshufd  $tmp,$src2,0xE\n\t"
  5140             "pshufd  $tmp,$src2,0xE\n\t"
  5030             "vaddsd  $dst,$tmp2,$tmp\t! add reduction2D" %}
  5141             "vaddsd  $dst,$dst,$tmp\t! add reduction2D" %}
  5031   ins_encode %{
  5142   ins_encode %{
  5032     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5143     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5033     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5144     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5034     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5145     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5035   %}
  5146   %}
  5036   ins_pipe( pipe_slow );
  5147   ins_pipe( pipe_slow );
  5037 %}
  5148 %}
  5038 
  5149 
  5039 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
  5150 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
  5040   predicate(UseAVX > 0);
  5151   predicate(UseAVX > 0);
  5041   match(Set dst (AddReductionVD src1 src2));
  5152   match(Set dst (AddReductionVD dst src2));
  5042   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5153   effect(TEMP tmp, TEMP dst, TEMP tmp2);
  5043   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
  5154   format %{ "vaddsd  $dst,$dst,$src2\n\t"
  5044             "pshufd  $tmp,$src2,0xE\n\t"
  5155             "pshufd  $tmp,$src2,0xE\n\t"
  5045             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
  5156             "vaddsd  $dst,$dst,$tmp\n\t"
  5046             "vextractf128  $tmp3,$src2\n\t"
  5157             "vextractf32x4h  $tmp2,$src2, 0x1\n\t"
  5047             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
  5158             "vaddsd  $dst,$dst,$tmp2\n\t"
  5048             "pshufd  $tmp,$tmp3,0xE\n\t"
  5159             "pshufd  $tmp,$tmp2,0xE\n\t"
  5049             "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
  5160             "vaddsd  $dst,$dst,$tmp\t! add reduction4D" %}
  5050   ins_encode %{
  5161   ins_encode %{
  5051     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5162     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5052     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5163     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5053     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5164     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5054     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
  5165     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
  5055     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5166     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5056     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
  5167     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5057     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5168     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5058   %}
  5169   %}
  5059   ins_pipe( pipe_slow );
  5170   ins_pipe( pipe_slow );
  5060 %}
  5171 %}
  5061 
  5172 
  5062 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
  5173 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
  5063   predicate(UseAVX > 2);
  5174   predicate(UseAVX > 2);
  5064   match(Set dst (AddReductionVD src1 src2));
  5175   match(Set dst (AddReductionVD dst src2));
  5065   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5176   effect(TEMP tmp, TEMP dst, TEMP tmp2);
  5066   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
  5177   format %{ "vaddsd  $dst,$dst,$src2\n\t"
  5067             "pshufd  $tmp,$src2,0xE\n\t"
  5178             "pshufd  $tmp,$src2,0xE\n\t"
  5068             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
  5179             "vaddsd  $dst,$dst,$tmp\n\t"
  5069             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
  5180             "vextractf32x4  $tmp2,$src2, 0x1\n\t"
  5070             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
  5181             "vaddsd  $dst,$dst,$tmp2\n\t"
  5071             "pshufd  $tmp,$tmp3,0xE\n\t"
  5182             "pshufd  $tmp,$tmp2,0xE\n\t"
  5072             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
  5183             "vaddsd  $dst,$dst,$tmp\n\t"
  5073             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
  5184             "vextractf32x4  $tmp2,$src2, 0x2\n\t"
  5074             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
  5185             "vaddsd  $dst,$dst,$tmp2\n\t"
  5075             "pshufd  $tmp,$tmp3,0xE\n\t"
  5186             "pshufd  $tmp,$tmp2,0xE\n\t"
  5076             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
  5187             "vaddsd  $dst,$dst,$tmp\n\t"
  5077             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
  5188             "vextractf32x4  $tmp2,$src2, 0x3\n\t"
  5078             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
  5189             "vaddsd  $dst,$dst,$tmp2\n\t"
  5079             "pshufd  $tmp,$tmp3,0xE\n\t"
  5190             "pshufd  $tmp,$tmp2,0xE\n\t"
  5080             "vaddsd  $dst,$tmp2,$tmp\t! add reduction8D" %}
  5191             "vaddsd  $dst,$dst,$tmp\t! add reduction8D" %}
  5081   ins_encode %{
  5192   ins_encode %{
  5082     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5193     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5083     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5194     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5084     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5195     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5085     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
  5196     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
  5086     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5197     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5087     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
  5198     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5088     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5199     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5089     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
  5200     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
  5090     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5201     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5091     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
  5202     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5092     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5203     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5093     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
  5204     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
  5094     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5205     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5095     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
  5206     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5096     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5207     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5097   %}
  5208   %}
  5098   ins_pipe( pipe_slow );
  5209   ins_pipe( pipe_slow );
  5099 %}
  5210 %}
  5100 
  5211 
  5101 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
  5212 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
  5214 
  5325 
  5215 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
  5326 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
  5216   predicate(UseAVX > 2);
  5327   predicate(UseAVX > 2);
  5217   match(Set dst (MulReductionVI src1 src2));
  5328   match(Set dst (MulReductionVI src1 src2));
  5218   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5329   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5219   format %{ "vextracti64x4  $tmp3,$src2\n\t"
  5330   format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
  5220             "vpmulld  $tmp3,$tmp3,$src2\n\t"
  5331             "vpmulld  $tmp3,$tmp3,$src2\n\t"
  5221             "vextracti128   $tmp,$tmp3\n\t"
  5332             "vextracti128   $tmp,$tmp3\n\t"
  5222             "vpmulld  $tmp,$tmp,$src2\n\t"
  5333             "vpmulld  $tmp,$tmp,$src2\n\t"
  5223             "pshufd   $tmp2,$tmp,0xE\n\t"
  5334             "pshufd   $tmp2,$tmp,0xE\n\t"
  5224             "vpmulld  $tmp,$tmp,$tmp2\n\t"
  5335             "vpmulld  $tmp,$tmp,$tmp2\n\t"
  5226             "vpmulld  $tmp,$tmp,$tmp2\n\t"
  5337             "vpmulld  $tmp,$tmp,$tmp2\n\t"
  5227             "movd     $tmp2,$src1\n\t"
  5338             "movd     $tmp2,$src1\n\t"
  5228             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
  5339             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
  5229             "movd     $dst,$tmp2\t! mul reduction16I" %}
  5340             "movd     $dst,$tmp2\t! mul reduction16I" %}
  5230   ins_encode %{
  5341   ins_encode %{
  5231     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
  5342     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
  5232     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
  5343     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
  5233     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
  5344     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
  5234     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
  5345     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
  5235     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
  5346     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
  5236     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
  5347     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
  5265 
  5376 
  5266 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
  5377 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
  5267   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
  5378   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
  5268   match(Set dst (MulReductionVL src1 src2));
  5379   match(Set dst (MulReductionVL src1 src2));
  5269   effect(TEMP tmp, TEMP tmp2);
  5380   effect(TEMP tmp, TEMP tmp2);
  5270   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
  5381   format %{ "vextracti128  $tmp,$src2\n\t"
  5271             "vpmullq  $tmp2,$tmp,$src2\n\t"
  5382             "vpmullq  $tmp2,$tmp,$src2\n\t"
  5272             "pshufd   $tmp,$tmp2,0xE\n\t"
  5383             "pshufd   $tmp,$tmp2,0xE\n\t"
  5273             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5384             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5274             "movdq    $tmp,$src1\n\t"
  5385             "movdq    $tmp,$src1\n\t"
  5275             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5386             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5276             "movdq    $dst,$tmp2\t! mul reduction4L" %}
  5387             "movdq    $dst,$tmp2\t! mul reduction4L" %}
  5277   ins_encode %{
  5388   ins_encode %{
  5278     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
  5389     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
  5279     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
  5390     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
  5280     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5391     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5281     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  5392     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  5282     __ movdq($tmp$$XMMRegister, $src1$$Register);
  5393     __ movdq($tmp$$XMMRegister, $src1$$Register);
  5283     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  5394     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  5288 
  5399 
  5289 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
  5400 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
  5290   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
  5401   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
  5291   match(Set dst (MulReductionVL src1 src2));
  5402   match(Set dst (MulReductionVL src1 src2));
  5292   effect(TEMP tmp, TEMP tmp2);
  5403   effect(TEMP tmp, TEMP tmp2);
  5293   format %{ "vextracti64x4  $tmp2,$src2\n\t"
  5404   format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
  5294             "vpmullq  $tmp2,$tmp2,$src2\n\t"
  5405             "vpmullq  $tmp2,$tmp2,$src2\n\t"
  5295             "vextracti128   $tmp,$tmp2\n\t"
  5406             "vextracti128   $tmp,$tmp2\n\t"
  5296             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5407             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5297             "pshufd   $tmp,$tmp2,0xE\n\t"
  5408             "pshufd   $tmp,$tmp2,0xE\n\t"
  5298             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5409             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5299             "movdq    $tmp,$src1\n\t"
  5410             "movdq    $tmp,$src1\n\t"
  5300             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5411             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
  5301             "movdq    $dst,$tmp2\t! mul reduction8L" %}
  5412             "movdq    $dst,$tmp2\t! mul reduction8L" %}
  5302   ins_encode %{
  5413   ins_encode %{
  5303     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
  5414     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
  5304     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
  5415     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
  5305     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
  5416     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
  5306     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  5417     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  5307     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5418     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5308     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  5419     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
  5312   %}
  5423   %}
  5313   ins_pipe( pipe_slow );
  5424   ins_pipe( pipe_slow );
  5314 %}
  5425 %}
  5315 #endif
  5426 #endif
  5316 
  5427 
  5317 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
  5428 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{
  5318   predicate(UseSSE >= 1 && UseAVX == 0);
  5429   predicate(UseSSE >= 1 && UseAVX == 0);
  5319   match(Set dst (MulReductionVF src1 src2));
  5430   match(Set dst (MulReductionVF dst src2));
  5320   effect(TEMP tmp, TEMP tmp2);
  5431   effect(TEMP dst, TEMP tmp);
  5321   format %{ "movdqu  $tmp,$src1\n\t"
  5432   format %{ "mulss   $dst,$src2\n\t"
  5322             "mulss   $tmp,$src2\n\t"
  5433             "pshufd  $tmp,$src2,0x01\n\t"
  5323             "pshufd  $tmp2,$src2,0x01\n\t"
  5434             "mulss   $dst,$tmp\t! mul reduction2F" %}
  5324             "mulss   $tmp,$tmp2\n\t"
  5435   ins_encode %{
  5325             "movdqu  $dst,$tmp\t! mul reduction2F" %}
  5436     __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
  5326   ins_encode %{
  5437     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5327     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
  5438     __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
  5328     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
  5439   %}
  5329     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
  5440   ins_pipe( pipe_slow );
  5330     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
  5441 %}
  5331     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
  5442 
  5332   %}
  5443 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
  5333   ins_pipe( pipe_slow );
       
  5334 %}
       
  5335 
       
  5336 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
       
  5337   predicate(UseAVX > 0);
  5444   predicate(UseAVX > 0);
  5338   match(Set dst (MulReductionVF src1 src2));
  5445   match(Set dst (MulReductionVF dst src2));
  5339   effect(TEMP tmp, TEMP tmp2);
  5446   effect(TEMP tmp, TEMP dst);
  5340   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
  5447   format %{ "vmulss  $dst,$dst,$src2\n\t"
  5341             "pshufd  $tmp,$src2,0x01\n\t"
  5448             "pshufd  $tmp,$src2,0x01\n\t"
  5342             "vmulss  $dst,$tmp2,$tmp\t! mul reduction2F" %}
  5449             "vmulss  $dst,$dst,$tmp\t! mul reduction2F" %}
  5343   ins_encode %{
  5450   ins_encode %{
  5344     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5451     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5345     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5452     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5346     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5453     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5347   %}
  5454   %}
  5348   ins_pipe( pipe_slow );
  5455   ins_pipe( pipe_slow );
  5349 %}
  5456 %}
  5350 
  5457 
  5351 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
  5458 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
  5352   predicate(UseSSE >= 1 && UseAVX == 0);
  5459   predicate(UseSSE >= 1 && UseAVX == 0);
  5353   match(Set dst (MulReductionVF src1 src2));
  5460   match(Set dst (MulReductionVF dst src2));
  5354   effect(TEMP tmp, TEMP tmp2);
  5461   effect(TEMP dst, TEMP tmp);
  5355   format %{ "movdqu  $tmp,$src1\n\t"
  5462   format %{ "mulss   $dst,$src2\n\t"
  5356             "mulss   $tmp,$src2\n\t"
  5463             "pshufd  $tmp,$src2,0x01\n\t"
  5357             "pshufd  $tmp2,$src2,0x01\n\t"
  5464             "mulss   $dst,$tmp\n\t"
  5358             "mulss   $tmp,$tmp2\n\t"
  5465             "pshufd  $tmp,$src2,0x02\n\t"
  5359             "pshufd  $tmp2,$src2,0x02\n\t"
  5466             "mulss   $dst,$tmp\n\t"
  5360             "mulss   $tmp,$tmp2\n\t"
  5467             "pshufd  $tmp,$src2,0x03\n\t"
  5361             "pshufd  $tmp2,$src2,0x03\n\t"
  5468             "mulss   $dst,$tmp\t! mul reduction4F" %}
  5362             "mulss   $tmp,$tmp2\n\t"
  5469   ins_encode %{
  5363             "movdqu  $dst,$tmp\t! mul reduction4F" %}
  5470     __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
  5364   ins_encode %{
  5471     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5365     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
  5472     __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
  5366     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
  5473     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5367     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
  5474     __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
  5368     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
  5475     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5369     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
  5476     __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
  5370     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
  5477   %}
  5371     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
  5478   ins_pipe( pipe_slow );
  5372     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
  5479 %}
  5373     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
  5480 
  5374   %}
  5481 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
  5375   ins_pipe( pipe_slow );
       
  5376 %}
       
  5377 
       
  5378 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
       
  5379   predicate(UseAVX > 0);
  5482   predicate(UseAVX > 0);
  5380   match(Set dst (MulReductionVF src1 src2));
  5483   match(Set dst (MulReductionVF dst src2));
  5381   effect(TEMP tmp, TEMP tmp2);
  5484   effect(TEMP tmp, TEMP dst);
  5382   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
  5485   format %{ "vmulss  $dst,$dst,$src2\n\t"
  5383             "pshufd  $tmp,$src2,0x01\n\t"
  5486             "pshufd  $tmp,$src2,0x01\n\t"
  5384             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5487             "vmulss  $dst,$dst,$tmp\n\t"
  5385             "pshufd  $tmp,$src2,0x02\n\t"
  5488             "pshufd  $tmp,$src2,0x02\n\t"
  5386             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5489             "vmulss  $dst,$dst,$tmp\n\t"
  5387             "pshufd  $tmp,$src2,0x03\n\t"
  5490             "pshufd  $tmp,$src2,0x03\n\t"
  5388             "vmulss  $dst,$tmp2,$tmp\t! mul reduction4F" %}
  5491             "vmulss  $dst,$dst,$tmp\t! mul reduction4F" %}
  5389   ins_encode %{
  5492   ins_encode %{
  5390     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5493     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5391     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5494     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5392     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5495     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5393     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5496     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5394     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5497     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5395     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5498     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5396     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5499     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5397   %}
  5500   %}
  5398   ins_pipe( pipe_slow );
  5501   ins_pipe( pipe_slow );
  5399 %}
  5502 %}
  5400 
  5503 
  5401 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
  5504 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
  5402   predicate(UseAVX > 0);
  5505   predicate(UseAVX > 0);
  5403   match(Set dst (MulReductionVF src1 src2));
  5506   match(Set dst (MulReductionVF dst src2));
  5404   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5507   effect(TEMP tmp, TEMP dst, TEMP tmp2);
  5405   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
  5508   format %{ "vmulss  $dst,$dst,$src2\n\t"
  5406             "pshufd  $tmp,$src2,0x01\n\t"
  5509             "pshufd  $tmp,$src2,0x01\n\t"
  5407             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5510             "vmulss  $dst,$dst,$tmp\n\t"
  5408             "pshufd  $tmp,$src2,0x02\n\t"
  5511             "pshufd  $tmp,$src2,0x02\n\t"
  5409             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5512             "vmulss  $dst,$dst,$tmp\n\t"
  5410             "pshufd  $tmp,$src2,0x03\n\t"
  5513             "pshufd  $tmp,$src2,0x03\n\t"
  5411             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5514             "vmulss  $dst,$dst,$tmp\n\t"
  5412             "vextractf128  $tmp3,$src2\n\t"
  5515             "vextractf128  $tmp2,$src2\n\t"
  5413             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
  5516             "vmulss  $dst,$dst,$tmp2\n\t"
  5414             "pshufd  $tmp,$tmp3,0x01\n\t"
  5517             "pshufd  $tmp,$tmp2,0x01\n\t"
  5415             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5518             "vmulss  $dst,$dst,$tmp\n\t"
  5416             "pshufd  $tmp,$tmp3,0x02\n\t"
  5519             "pshufd  $tmp,$tmp2,0x02\n\t"
  5417             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5520             "vmulss  $dst,$dst,$tmp\n\t"
  5418             "pshufd  $tmp,$tmp3,0x03\n\t"
  5521             "pshufd  $tmp,$tmp2,0x03\n\t"
  5419             "vmulss  $dst,$tmp2,$tmp\t! mul reduction8F" %}
  5522             "vmulss  $dst,$dst,$tmp\t! mul reduction8F" %}
  5420   ins_encode %{
  5523   ins_encode %{
  5421     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5524     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5422     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5525     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5423     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5526     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5424     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5527     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5425     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5528     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5426     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5529     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5427     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5530     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5428     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
  5531     __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
  5429     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5532     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5430     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
  5533     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
  5431     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5534     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5432     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
  5535     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
  5433     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5536     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5434     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
  5537     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
  5435     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5538     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5436   %}
  5539   %}
  5437   ins_pipe( pipe_slow );
  5540   ins_pipe( pipe_slow );
  5438 %}
  5541 %}
  5439 
  5542 
  5440 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
  5543 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
  5441   predicate(UseAVX > 2);
  5544   predicate(UseAVX > 2);
  5442   match(Set dst (MulReductionVF src1 src2));
  5545   match(Set dst (MulReductionVF dst src2));
  5443   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5546   effect(TEMP tmp, TEMP dst, TEMP tmp2);
  5444   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
  5547   format %{ "vmulss  $dst,$dst,$src2\n\t"
  5445             "pshufd  $tmp,$src2,0x01\n\t"
  5548             "pshufd  $tmp,$src2,0x01\n\t"
  5446             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5549             "vmulss  $dst,$dst,$tmp\n\t"
  5447             "pshufd  $tmp,$src2,0x02\n\t"
  5550             "pshufd  $tmp,$src2,0x02\n\t"
  5448             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5551             "vmulss  $dst,$dst,$tmp\n\t"
  5449             "pshufd  $tmp,$src2,0x03\n\t"
  5552             "pshufd  $tmp,$src2,0x03\n\t"
  5450             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5553             "vmulss  $dst,$dst,$tmp\n\t"
  5451             "vextractf32x4  $tmp3,$src2, 0x1\n\t"
  5554             "vextractf32x4  $tmp2,$src2, 0x1\n\t"
  5452             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
  5555             "vmulss  $dst,$dst,$tmp2\n\t"
  5453             "pshufd  $tmp,$tmp3,0x01\n\t"
  5556             "pshufd  $tmp,$tmp2,0x01\n\t"
  5454             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5557             "vmulss  $dst,$dst,$tmp\n\t"
  5455             "pshufd  $tmp,$tmp3,0x02\n\t"
  5558             "pshufd  $tmp,$tmp2,0x02\n\t"
  5456             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5559             "vmulss  $dst,$dst,$tmp\n\t"
  5457             "pshufd  $tmp,$tmp3,0x03\n\t"
  5560             "pshufd  $tmp,$tmp2,0x03\n\t"
  5458             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5561             "vmulss  $dst,$dst,$tmp\n\t"
  5459             "vextractf32x4  $tmp3,$src2, 0x2\n\t"
  5562             "vextractf32x4  $tmp2,$src2, 0x2\n\t"
  5460             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
  5563             "vmulss  $dst,$dst,$tmp2\n\t"
  5461             "pshufd  $tmp,$tmp3,0x01\n\t"
  5564             "pshufd  $tmp,$tmp2,0x01\n\t"
  5462             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5565             "vmulss  $dst,$dst,$tmp\n\t"
  5463             "pshufd  $tmp,$tmp3,0x02\n\t"
  5566             "pshufd  $tmp,$tmp2,0x02\n\t"
  5464             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5567             "vmulss  $dst,$dst,$tmp\n\t"
  5465             "pshufd  $tmp,$tmp3,0x03\n\t"
  5568             "pshufd  $tmp,$tmp2,0x03\n\t"
  5466             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5569             "vmulss  $dst,$dst,$tmp\n\t"
  5467             "vextractf32x4  $tmp3,$src2, 0x3\n\t"
  5570             "vextractf32x4  $tmp2,$src2, 0x3\n\t"
  5468             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
  5571             "vmulss  $dst,$dst,$tmp2\n\t"
  5469             "pshufd  $tmp,$tmp3,0x01\n\t"
  5572             "pshufd  $tmp,$tmp2,0x01\n\t"
  5470             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5573             "vmulss  $dst,$dst,$tmp\n\t"
  5471             "pshufd  $tmp,$tmp3,0x02\n\t"
  5574             "pshufd  $tmp,$tmp2,0x02\n\t"
  5472             "vmulss  $tmp2,$tmp2,$tmp\n\t"
  5575             "vmulss  $dst,$dst,$tmp\n\t"
  5473             "pshufd  $tmp,$tmp3,0x03\n\t"
  5576             "pshufd  $tmp,$tmp2,0x03\n\t"
  5474             "vmulss  $dst,$tmp2,$tmp\t! mul reduction16F" %}
  5577             "vmulss  $dst,$dst,$tmp\t! mul reduction16F" %}
  5475   ins_encode %{
  5578   ins_encode %{
  5476     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5579     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5477     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5580     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
  5478     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5581     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5479     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5582     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
  5480     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5583     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5481     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5584     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
  5482     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5585     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5483     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
  5586     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
  5484     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5587     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5485     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
  5588     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
  5486     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5589     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5487     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
  5590     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
  5488     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5591     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5489     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
  5592     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
  5490     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5593     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5491     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
  5594     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
  5492     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5595     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5493     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
  5596     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
  5494     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5597     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5495     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
  5598     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
  5496     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5599     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5497     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
  5600     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
  5498     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5601     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5499     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
  5602     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
  5500     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5603     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5501     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
  5604     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
  5502     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5605     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5503     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
  5606     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
  5504     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5607     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5505     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
  5608     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
  5506     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5609     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5507   %}
  5610   %}
  5508   ins_pipe( pipe_slow );
  5611   ins_pipe( pipe_slow );
  5509 %}
  5612 %}
  5510 
  5613 
  5511 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
  5614 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
  5512   predicate(UseSSE >= 1 && UseAVX == 0);
  5615   predicate(UseSSE >= 1 && UseAVX == 0);
  5513   match(Set dst (MulReductionVD src1 src2));
  5616   match(Set dst (MulReductionVD dst src2));
       
  5617   effect(TEMP dst, TEMP tmp);
       
  5618   format %{ "mulsd   $dst,$src2\n\t"
       
  5619             "pshufd  $tmp,$src2,0xE\n\t"
       
  5620             "mulsd   $dst,$tmp\t! mul reduction2D" %}
       
  5621   ins_encode %{
       
  5622     __ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
       
  5623     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  5624     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
       
  5625   %}
       
  5626   ins_pipe( pipe_slow );
       
  5627 %}
       
  5628 
       
  5629 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
       
  5630   predicate(UseAVX > 0);
       
  5631   match(Set dst (MulReductionVD dst src2));
  5514   effect(TEMP tmp, TEMP dst);
  5632   effect(TEMP tmp, TEMP dst);
  5515   format %{ "movdqu  $tmp,$src1\n\t"
  5633   format %{ "vmulsd  $dst,$dst,$src2\n\t"
  5516             "mulsd   $tmp,$src2\n\t"
  5634             "pshufd  $tmp,$src2,0xE\n\t"
  5517             "pshufd  $dst,$src2,0xE\n\t"
  5635             "vmulsd  $dst,$dst,$tmp\t! mul reduction2D" %}
  5518             "mulsd   $dst,$tmp\t! mul reduction2D" %}
  5636   ins_encode %{
  5519   ins_encode %{
  5637     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5520     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
  5638     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5521     __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
  5639     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5522     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
  5640   %}
  5523     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
  5641   ins_pipe( pipe_slow );
  5524   %}
  5642 %}
  5525   ins_pipe( pipe_slow );
  5643 
  5526 %}
  5644 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
  5527 
       
  5528 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
       
  5529   predicate(UseAVX > 0);
  5645   predicate(UseAVX > 0);
  5530   match(Set dst (MulReductionVD src1 src2));
  5646   match(Set dst (MulReductionVD dst src2));
  5531   effect(TEMP tmp, TEMP tmp2);
  5647   effect(TEMP tmp, TEMP dst, TEMP tmp2);
  5532   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
  5648   format %{ "vmulsd  $dst,$dst,$src2\n\t"
  5533             "pshufd  $tmp,$src2,0xE\n\t"
  5649             "pshufd  $tmp,$src2,0xE\n\t"
  5534             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
  5650             "vmulsd  $dst,$dst,$tmp\n\t"
  5535   ins_encode %{
  5651             "vextractf128  $tmp2,$src2\n\t"
  5536     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5652             "vmulsd  $dst,$dst,$tmp2\n\t"
       
  5653             "pshufd  $tmp,$tmp2,0xE\n\t"
       
  5654             "vmulsd  $dst,$dst,$tmp\t! mul reduction4D" %}
       
  5655   ins_encode %{
       
  5656     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5537     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5657     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5538     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5658     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5539   %}
  5659     __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
  5540   ins_pipe( pipe_slow );
  5660     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5541 %}
  5661     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5542 
  5662     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5543 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
  5663   %}
  5544   predicate(UseAVX > 0);
  5664   ins_pipe( pipe_slow );
  5545   match(Set dst (MulReductionVD src1 src2));
  5665 %}
  5546   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5666 
  5547   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
  5667 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
       
  5668   predicate(UseAVX > 2);
       
  5669   match(Set dst (MulReductionVD dst src2));
       
  5670   effect(TEMP tmp, TEMP dst, TEMP tmp2);
       
  5671   format %{ "vmulsd  $dst,$dst,$src2\n\t"
  5548             "pshufd  $tmp,$src2,0xE\n\t"
  5672             "pshufd  $tmp,$src2,0xE\n\t"
  5549             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
  5673             "vmulsd  $dst,$dst,$tmp\n\t"
  5550             "vextractf128  $tmp3,$src2\n\t"
  5674             "vextractf32x4  $tmp2,$src2, 0x1\n\t"
  5551             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
  5675             "vmulsd  $dst,$dst,$tmp2\n\t"
  5552             "pshufd  $tmp,$tmp3,0xE\n\t"
  5676             "pshufd  $tmp,$src2,0xE\n\t"
  5553             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
  5677             "vmulsd  $dst,$dst,$tmp\n\t"
  5554   ins_encode %{
  5678             "vextractf32x4  $tmp2,$src2, 0x2\n\t"
  5555     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
  5679             "vmulsd  $dst,$dst,$tmp2\n\t"
       
  5680             "pshufd  $tmp,$tmp2,0xE\n\t"
       
  5681             "vmulsd  $dst,$dst,$tmp\n\t"
       
  5682             "vextractf32x4  $tmp2,$src2, 0x3\n\t"
       
  5683             "vmulsd  $dst,$dst,$tmp2\n\t"
       
  5684             "pshufd  $tmp,$tmp2,0xE\n\t"
       
  5685             "vmulsd  $dst,$dst,$tmp\t! mul reduction8D" %}
       
  5686   ins_encode %{
       
  5687     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
  5556     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5688     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
  5557     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5689     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5558     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
  5690     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
  5559     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
  5691     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5560     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
  5692     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5561     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
  5693     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5562   %}
  5694     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
  5563   ins_pipe( pipe_slow );
  5695     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5564 %}
  5696     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5565 
  5697     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5566 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
  5698     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
  5567   predicate(UseAVX > 2);
  5699     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
  5568   match(Set dst (MulReductionVD src1 src2));
  5700     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
  5569   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
  5701     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
  5570   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
       
  5571             "pshufd  $tmp,$src2,0xE\n\t"
       
  5572             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
       
  5573             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
       
  5574             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
       
  5575             "pshufd  $tmp,$src2,0xE\n\t"
       
  5576             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
       
  5577             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
       
  5578             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
       
  5579             "pshufd  $tmp,$tmp3,0xE\n\t"
       
  5580             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
       
  5581             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
       
  5582             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
       
  5583             "pshufd  $tmp,$tmp3,0xE\n\t"
       
  5584             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction8D" %}
       
  5585   ins_encode %{
       
  5586     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  5587     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  5588     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  5589     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
       
  5590     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
       
  5591     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
       
  5592     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  5593     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
       
  5594     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
       
  5595     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
       
  5596     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  5597     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
       
  5598     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
       
  5599     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
       
  5600     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  5601   %}
  5702   %}
  5602   ins_pipe( pipe_slow );
  5703   ins_pipe( pipe_slow );
  5603 %}
  5704 %}
  5604 
  5705 
  5605 // ====================VECTOR ARITHMETIC=======================================
  5706 // ====================VECTOR ARITHMETIC=======================================
  5606 
  5707 
  5607 // --------------------------------- ADD --------------------------------------
  5708 // --------------------------------- ADD --------------------------------------
  5608 
  5709 
  5609 // Bytes vector add
  5710 // Bytes vector add
  5610 instruct vadd4B(vecS dst, vecS src) %{
  5711 instruct vadd4B(vecS dst, vecS src) %{
  5611   predicate(n->as_Vector()->length() == 4);
  5712   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  5612   match(Set dst (AddVB dst src));
  5713   match(Set dst (AddVB dst src));
  5613   format %{ "paddb   $dst,$src\t! add packed4B" %}
  5714   format %{ "paddb   $dst,$src\t! add packed4B" %}
  5614   ins_encode %{
  5715   ins_encode %{
  5615     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  5716     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  5616   %}
  5717   %}
  5617   ins_pipe( pipe_slow );
  5718   ins_pipe( pipe_slow );
  5618 %}
  5719 %}
  5619 
  5720 
  5620 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
  5721 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
  5621   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  5722   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  5622   match(Set dst (AddVB src1 src2));
  5723   match(Set dst (AddVB src1 src2));
  5623   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
  5724   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
  5624   ins_encode %{
  5725   ins_encode %{
  5625     int vector_len = 0;
  5726     int vector_len = 0;
  5626     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5727     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5627   %}
  5728   %}
  5628   ins_pipe( pipe_slow );
  5729   ins_pipe( pipe_slow );
  5629 %}
  5730 %}
  5630 
  5731 
  5631 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
  5732 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
  5632   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  5733   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  5734   match(Set dst (AddVB src1 src2));
       
  5735   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
       
  5736   ins_encode %{
       
  5737     int vector_len = 0;
       
  5738     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  5739   %}
       
  5740   ins_pipe( pipe_slow );
       
  5741 %}
       
  5742 
       
  5743 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
       
  5744   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  5745   match(Set dst (AddVB dst src2));
       
  5746   effect(TEMP src1);
       
  5747   format %{ "vpaddb  $dst,$dst,$src2\t! add packed4B" %}
       
  5748   ins_encode %{
       
  5749     int vector_len = 0;
       
  5750     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  5751   %}
       
  5752   ins_pipe( pipe_slow );
       
  5753 %}
       
  5754 
       
  5755 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{
       
  5756   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  5633   match(Set dst (AddVB src (LoadVector mem)));
  5757   match(Set dst (AddVB src (LoadVector mem)));
  5634   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
  5758   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
  5635   ins_encode %{
  5759   ins_encode %{
  5636     int vector_len = 0;
  5760     int vector_len = 0;
  5637     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5761     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5638   %}
  5762   %}
  5639   ins_pipe( pipe_slow );
  5763   ins_pipe( pipe_slow );
  5640 %}
  5764 %}
  5641 
  5765 
       
  5766 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{
       
  5767   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  5768   match(Set dst (AddVB src (LoadVector mem)));
       
  5769   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
       
  5770   ins_encode %{
       
  5771     int vector_len = 0;
       
  5772     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  5773   %}
       
  5774   ins_pipe( pipe_slow );
       
  5775 %}
       
  5776 
       
  5777 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
       
  5778   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  5779   match(Set dst (AddVB dst (LoadVector mem)));
       
  5780   effect(TEMP src);
       
  5781   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
       
  5782   ins_encode %{
       
  5783     int vector_len = 0;
       
  5784     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  5785   %}
       
  5786   ins_pipe( pipe_slow );
       
  5787 %}
       
  5788 
  5642 instruct vadd8B(vecD dst, vecD src) %{
  5789 instruct vadd8B(vecD dst, vecD src) %{
  5643   predicate(n->as_Vector()->length() == 8);
  5790   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  5644   match(Set dst (AddVB dst src));
  5791   match(Set dst (AddVB dst src));
  5645   format %{ "paddb   $dst,$src\t! add packed8B" %}
  5792   format %{ "paddb   $dst,$src\t! add packed8B" %}
  5646   ins_encode %{
  5793   ins_encode %{
  5647     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  5794     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  5648   %}
  5795   %}
  5649   ins_pipe( pipe_slow );
  5796   ins_pipe( pipe_slow );
  5650 %}
  5797 %}
  5651 
  5798 
  5652 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
  5799 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
  5653   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  5800   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  5654   match(Set dst (AddVB src1 src2));
  5801   match(Set dst (AddVB src1 src2));
  5655   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
  5802   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
  5656   ins_encode %{
  5803   ins_encode %{
  5657     int vector_len = 0;
  5804     int vector_len = 0;
  5658     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5805     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5659   %}
  5806   %}
  5660   ins_pipe( pipe_slow );
  5807   ins_pipe( pipe_slow );
  5661 %}
  5808 %}
  5662 
  5809 
  5663 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
  5810 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
  5664   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  5811   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  5812   match(Set dst (AddVB src1 src2));
       
  5813   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
       
  5814   ins_encode %{
       
  5815     int vector_len = 0;
       
  5816     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  5817   %}
       
  5818   ins_pipe( pipe_slow );
       
  5819 %}
       
  5820 
       
  5821 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
       
  5822   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  5823   match(Set dst (AddVB dst src2));
       
  5824   effect(TEMP src1);
       
  5825   format %{ "vpaddb  $dst,$dst,$src2\t! add packed8B" %}
       
  5826   ins_encode %{
       
  5827     int vector_len = 0;
       
  5828     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  5829   %}
       
  5830   ins_pipe( pipe_slow );
       
  5831 %}
       
  5832 
       
  5833 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{
       
  5834   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  5665   match(Set dst (AddVB src (LoadVector mem)));
  5835   match(Set dst (AddVB src (LoadVector mem)));
  5666   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
  5836   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
  5667   ins_encode %{
  5837   ins_encode %{
  5668     int vector_len = 0;
  5838     int vector_len = 0;
  5669     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5839     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5670   %}
  5840   %}
  5671   ins_pipe( pipe_slow );
  5841   ins_pipe( pipe_slow );
  5672 %}
  5842 %}
  5673 
  5843 
       
  5844 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{
       
  5845   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  5846   match(Set dst (AddVB src (LoadVector mem)));
       
  5847   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
       
  5848   ins_encode %{
       
  5849     int vector_len = 0;
       
  5850     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  5851   %}
       
  5852   ins_pipe( pipe_slow );
       
  5853 %}
       
  5854 
       
  5855 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
       
  5856   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  5857   match(Set dst (AddVB dst (LoadVector mem)));
       
  5858   effect(TEMP src);
       
  5859   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
       
  5860   ins_encode %{
       
  5861     int vector_len = 0;
       
  5862     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  5863   %}
       
  5864   ins_pipe( pipe_slow );
       
  5865 %}
       
  5866 
  5674 instruct vadd16B(vecX dst, vecX src) %{
  5867 instruct vadd16B(vecX dst, vecX src) %{
  5675   predicate(n->as_Vector()->length() == 16);
  5868   predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
  5676   match(Set dst (AddVB dst src));
  5869   match(Set dst (AddVB dst src));
  5677   format %{ "paddb   $dst,$src\t! add packed16B" %}
  5870   format %{ "paddb   $dst,$src\t! add packed16B" %}
  5678   ins_encode %{
  5871   ins_encode %{
  5679     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  5872     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
  5680   %}
  5873   %}
  5681   ins_pipe( pipe_slow );
  5874   ins_pipe( pipe_slow );
  5682 %}
  5875 %}
  5683 
  5876 
  5684 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
  5877 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
  5685   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  5878   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
  5686   match(Set dst (AddVB src1 src2));
  5879   match(Set dst (AddVB src1 src2));
  5687   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
  5880   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
  5688   ins_encode %{
  5881   ins_encode %{
  5689     int vector_len = 0;
  5882     int vector_len = 0;
  5690     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5883     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5691   %}
  5884   %}
  5692   ins_pipe( pipe_slow );
  5885   ins_pipe( pipe_slow );
  5693 %}
  5886 %}
  5694 
  5887 
  5695 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
  5888 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
  5696   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  5889   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  5890   match(Set dst (AddVB src1 src2));
       
  5891   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
       
  5892   ins_encode %{
       
  5893     int vector_len = 0;
       
  5894     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  5895   %}
       
  5896   ins_pipe( pipe_slow );
       
  5897 %}
       
  5898 
       
  5899 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
       
  5900   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  5901   match(Set dst (AddVB dst src2));
       
  5902   effect(TEMP src1);
       
  5903   format %{ "vpaddb  $dst,$dst,$src2\t! add packed16B" %}
       
  5904   ins_encode %{
       
  5905     int vector_len = 0;
       
  5906     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  5907   %}
       
  5908   ins_pipe( pipe_slow );
       
  5909 %}
       
  5910 
       
  5911 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{
       
  5912   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
  5697   match(Set dst (AddVB src (LoadVector mem)));
  5913   match(Set dst (AddVB src (LoadVector mem)));
  5698   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
  5914   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
  5699   ins_encode %{
  5915   ins_encode %{
  5700     int vector_len = 0;
  5916     int vector_len = 0;
  5701     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5917     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5702   %}
  5918   %}
  5703   ins_pipe( pipe_slow );
  5919   ins_pipe( pipe_slow );
  5704 %}
  5920 %}
  5705 
  5921 
  5706 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
  5922 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{
  5707   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  5923   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  5924   match(Set dst (AddVB src (LoadVector mem)));
       
  5925   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
       
  5926   ins_encode %{
       
  5927     int vector_len = 0;
       
  5928     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  5929   %}
       
  5930   ins_pipe( pipe_slow );
       
  5931 %}
       
  5932 
       
  5933 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
       
  5934   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  5935   match(Set dst (AddVB dst (LoadVector mem)));
       
  5936   effect(TEMP src);
       
  5937   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
       
  5938   ins_encode %{
       
  5939     int vector_len = 0;
       
  5940     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  5941   %}
       
  5942   ins_pipe( pipe_slow );
       
  5943 %}
       
  5944 
       
  5945 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
       
  5946   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
  5708   match(Set dst (AddVB src1 src2));
  5947   match(Set dst (AddVB src1 src2));
  5709   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
  5948   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
  5710   ins_encode %{
  5949   ins_encode %{
  5711     int vector_len = 1;
  5950     int vector_len = 1;
  5712     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5951     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5713   %}
  5952   %}
  5714   ins_pipe( pipe_slow );
  5953   ins_pipe( pipe_slow );
  5715 %}
  5954 %}
  5716 
  5955 
  5717 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
  5956 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
  5718   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  5957   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
       
  5958   match(Set dst (AddVB src1 src2));
       
  5959   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
       
  5960   ins_encode %{
       
  5961     int vector_len = 1;
       
  5962     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  5963   %}
       
  5964   ins_pipe( pipe_slow );
       
  5965 %}
       
  5966 
       
  5967 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
       
  5968   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
       
  5969   match(Set dst (AddVB dst src2));
       
  5970   effect(TEMP src1);
       
  5971   format %{ "vpaddb  $dst,$dst,$src2\t! add packed32B" %}
       
  5972   ins_encode %{
       
  5973     int vector_len = 1;
       
  5974     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  5975   %}
       
  5976   ins_pipe( pipe_slow );
       
  5977 %}
       
  5978 
       
  5979 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{
       
  5980   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
  5719   match(Set dst (AddVB src (LoadVector mem)));
  5981   match(Set dst (AddVB src (LoadVector mem)));
  5720   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
  5982   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
  5721   ins_encode %{
  5983   ins_encode %{
  5722     int vector_len = 1;
  5984     int vector_len = 1;
  5723     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5985     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5724   %}
  5986   %}
  5725   ins_pipe( pipe_slow );
  5987   ins_pipe( pipe_slow );
  5726 %}
  5988 %}
  5727 
  5989 
       
  5990 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{
       
  5991   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
       
  5992   match(Set dst (AddVB src (LoadVector mem)));
       
  5993   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
       
  5994   ins_encode %{
       
  5995     int vector_len = 1;
       
  5996     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  5997   %}
       
  5998   ins_pipe( pipe_slow );
       
  5999 %}
       
  6000 
       
  6001 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
       
  6002   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
       
  6003   match(Set dst (AddVB dst (LoadVector mem)));
       
  6004   effect(TEMP src);
       
  6005   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
       
  6006   ins_encode %{
       
  6007     int vector_len = 1;
       
  6008     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6009   %}
       
  6010   ins_pipe( pipe_slow );
       
  6011 %}
       
  6012 
  5728 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
  6013 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
  5729   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
  6014   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
  5730   match(Set dst (AddVB src1 src2));
  6015   match(Set dst (AddVB src1 src2));
  5731   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
  6016   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
  5732   ins_encode %{
  6017   ins_encode %{
  5733     int vector_len = 2;
  6018     int vector_len = 2;
  5734     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6019     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5735   %}
  6020   %}
  5736   ins_pipe( pipe_slow );
  6021   ins_pipe( pipe_slow );
  5737 %}
  6022 %}
  5738 
  6023 
  5739 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
  6024 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
  5740   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
  6025   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
  5741   match(Set dst (AddVB src (LoadVector mem)));
  6026   match(Set dst (AddVB src (LoadVector mem)));
  5742   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
  6027   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
  5743   ins_encode %{
  6028   ins_encode %{
  5744     int vector_len = 2;
  6029     int vector_len = 2;
  5745     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6030     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5747   ins_pipe( pipe_slow );
  6032   ins_pipe( pipe_slow );
  5748 %}
  6033 %}
  5749 
  6034 
  5750 // Shorts/Chars vector add
  6035 // Shorts/Chars vector add
  5751 instruct vadd2S(vecS dst, vecS src) %{
  6036 instruct vadd2S(vecS dst, vecS src) %{
  5752   predicate(n->as_Vector()->length() == 2);
  6037   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
  5753   match(Set dst (AddVS dst src));
  6038   match(Set dst (AddVS dst src));
  5754   format %{ "paddw   $dst,$src\t! add packed2S" %}
  6039   format %{ "paddw   $dst,$src\t! add packed2S" %}
  5755   ins_encode %{
  6040   ins_encode %{
  5756     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  6041     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  5757   %}
  6042   %}
  5758   ins_pipe( pipe_slow );
  6043   ins_pipe( pipe_slow );
  5759 %}
  6044 %}
  5760 
  6045 
  5761 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
  6046 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
  5762   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  6047   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  5763   match(Set dst (AddVS src1 src2));
  6048   match(Set dst (AddVS src1 src2));
  5764   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
  6049   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
  5765   ins_encode %{
  6050   ins_encode %{
  5766     int vector_len = 0;
  6051     int vector_len = 0;
  5767     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6052     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5768   %}
  6053   %}
  5769   ins_pipe( pipe_slow );
  6054   ins_pipe( pipe_slow );
  5770 %}
  6055 %}
  5771 
  6056 
  5772 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
  6057 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
  5773   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  6058   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  6059   match(Set dst (AddVS src1 src2));
       
  6060   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
       
  6061   ins_encode %{
       
  6062     int vector_len = 0;
       
  6063     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6064   %}
       
  6065   ins_pipe( pipe_slow );
       
  6066 %}
       
  6067 
       
  6068 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
       
  6069   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  6070   match(Set dst (AddVS dst src2));
       
  6071   effect(TEMP src1);
       
  6072   format %{ "vpaddw  $dst,$dst,$src2\t! add packed2S" %}
       
  6073   ins_encode %{
       
  6074     int vector_len = 0;
       
  6075     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6076   %}
       
  6077   ins_pipe( pipe_slow );
       
  6078 %}
       
  6079 
       
  6080 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{
       
  6081   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  5774   match(Set dst (AddVS src (LoadVector mem)));
  6082   match(Set dst (AddVS src (LoadVector mem)));
  5775   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
  6083   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
  5776   ins_encode %{
  6084   ins_encode %{
  5777     int vector_len = 0;
  6085     int vector_len = 0;
  5778     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6086     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5779   %}
  6087   %}
  5780   ins_pipe( pipe_slow );
  6088   ins_pipe( pipe_slow );
  5781 %}
  6089 %}
  5782 
  6090 
       
  6091 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{
       
  6092   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  6093   match(Set dst (AddVS src (LoadVector mem)));
       
  6094   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
       
  6095   ins_encode %{
       
  6096     int vector_len = 0;
       
  6097     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6098   %}
       
  6099   ins_pipe( pipe_slow );
       
  6100 %}
       
  6101 
       
  6102 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
       
  6103   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  6104   match(Set dst (AddVS dst (LoadVector mem)));
       
  6105   effect(TEMP src);
       
  6106   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
       
  6107   ins_encode %{
       
  6108     int vector_len = 0;
       
  6109     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6110   %}
       
  6111   ins_pipe( pipe_slow );
       
  6112 %}
       
  6113 
  5783 instruct vadd4S(vecD dst, vecD src) %{
  6114 instruct vadd4S(vecD dst, vecD src) %{
  5784   predicate(n->as_Vector()->length() == 4);
  6115   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  5785   match(Set dst (AddVS dst src));
  6116   match(Set dst (AddVS dst src));
  5786   format %{ "paddw   $dst,$src\t! add packed4S" %}
  6117   format %{ "paddw   $dst,$src\t! add packed4S" %}
  5787   ins_encode %{
  6118   ins_encode %{
  5788     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  6119     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  5789   %}
  6120   %}
  5790   ins_pipe( pipe_slow );
  6121   ins_pipe( pipe_slow );
  5791 %}
  6122 %}
  5792 
  6123 
  5793 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
  6124 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
  5794   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  6125   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  5795   match(Set dst (AddVS src1 src2));
  6126   match(Set dst (AddVS src1 src2));
  5796   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
  6127   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
  5797   ins_encode %{
  6128   ins_encode %{
  5798     int vector_len = 0;
  6129     int vector_len = 0;
  5799     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6130     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5800   %}
  6131   %}
  5801   ins_pipe( pipe_slow );
  6132   ins_pipe( pipe_slow );
  5802 %}
  6133 %}
  5803 
  6134 
  5804 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
  6135 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
  5805   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  6136   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  6137   match(Set dst (AddVS src1 src2));
       
  6138   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
       
  6139   ins_encode %{
       
  6140     int vector_len = 0;
       
  6141     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6142   %}
       
  6143   ins_pipe( pipe_slow );
       
  6144 %}
       
  6145 
       
  6146 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
       
  6147   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  6148   match(Set dst (AddVS dst src2));
       
  6149   effect(TEMP src1);
       
  6150   format %{ "vpaddw  $dst,$dst,$src2\t! add packed4S" %}
       
  6151   ins_encode %{
       
  6152     int vector_len = 0;
       
  6153     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6154   %}
       
  6155   ins_pipe( pipe_slow );
       
  6156 %}
       
  6157 
       
  6158 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{
       
  6159   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  5806   match(Set dst (AddVS src (LoadVector mem)));
  6160   match(Set dst (AddVS src (LoadVector mem)));
  5807   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
  6161   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
  5808   ins_encode %{
  6162   ins_encode %{
  5809     int vector_len = 0;
  6163     int vector_len = 0;
  5810     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6164     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5811   %}
  6165   %}
  5812   ins_pipe( pipe_slow );
  6166   ins_pipe( pipe_slow );
  5813 %}
  6167 %}
  5814 
  6168 
       
  6169 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{
       
  6170   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  6171   match(Set dst (AddVS src (LoadVector mem)));
       
  6172   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
       
  6173   ins_encode %{
       
  6174     int vector_len = 0;
       
  6175     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6176   %}
       
  6177   ins_pipe( pipe_slow );
       
  6178 %}
       
  6179 
       
  6180 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
       
  6181   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  6182   match(Set dst (AddVS dst (LoadVector mem)));
       
  6183   effect(TEMP src);
       
  6184   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
       
  6185   ins_encode %{
       
  6186     int vector_len = 0;
       
  6187     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6188   %}
       
  6189   ins_pipe( pipe_slow );
       
  6190 %}
       
  6191 
  5815 instruct vadd8S(vecX dst, vecX src) %{
  6192 instruct vadd8S(vecX dst, vecX src) %{
  5816   predicate(n->as_Vector()->length() == 8);
  6193   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  5817   match(Set dst (AddVS dst src));
  6194   match(Set dst (AddVS dst src));
  5818   format %{ "paddw   $dst,$src\t! add packed8S" %}
  6195   format %{ "paddw   $dst,$src\t! add packed8S" %}
  5819   ins_encode %{
  6196   ins_encode %{
  5820     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  6197     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
  5821   %}
  6198   %}
  5822   ins_pipe( pipe_slow );
  6199   ins_pipe( pipe_slow );
  5823 %}
  6200 %}
  5824 
  6201 
  5825 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
  6202 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
  5826   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  6203   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  5827   match(Set dst (AddVS src1 src2));
  6204   match(Set dst (AddVS src1 src2));
  5828   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
  6205   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
  5829   ins_encode %{
  6206   ins_encode %{
  5830     int vector_len = 0;
  6207     int vector_len = 0;
  5831     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6208     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5832   %}
  6209   %}
  5833   ins_pipe( pipe_slow );
  6210   ins_pipe( pipe_slow );
  5834 %}
  6211 %}
  5835 
  6212 
  5836 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
  6213 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
  5837   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  6214   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  6215   match(Set dst (AddVS src1 src2));
       
  6216   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
       
  6217   ins_encode %{
       
  6218     int vector_len = 0;
       
  6219     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6220   %}
       
  6221   ins_pipe( pipe_slow );
       
  6222 %}
       
  6223 
       
  6224 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
       
  6225   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  6226   match(Set dst (AddVS dst src2));
       
  6227   effect(TEMP src1);
       
  6228   format %{ "vpaddw  $dst,$dst,$src2\t! add packed8S" %}
       
  6229   ins_encode %{
       
  6230     int vector_len = 0;
       
  6231     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6232   %}
       
  6233   ins_pipe( pipe_slow );
       
  6234 %}
       
  6235 
       
  6236 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{
       
  6237   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  5838   match(Set dst (AddVS src (LoadVector mem)));
  6238   match(Set dst (AddVS src (LoadVector mem)));
  5839   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
  6239   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
  5840   ins_encode %{
  6240   ins_encode %{
  5841     int vector_len = 0;
  6241     int vector_len = 0;
  5842     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6242     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5843   %}
  6243   %}
  5844   ins_pipe( pipe_slow );
  6244   ins_pipe( pipe_slow );
  5845 %}
  6245 %}
  5846 
  6246 
  5847 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
  6247 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{
  5848   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  6248   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  6249   match(Set dst (AddVS src (LoadVector mem)));
       
  6250   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
       
  6251   ins_encode %{
       
  6252     int vector_len = 0;
       
  6253     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6254   %}
       
  6255   ins_pipe( pipe_slow );
       
  6256 %}
       
  6257 
       
  6258 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
       
  6259   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  6260   match(Set dst (AddVS dst (LoadVector mem)));
       
  6261   effect(TEMP src);
       
  6262   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
       
  6263   ins_encode %{
       
  6264     int vector_len = 0;
       
  6265     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6266   %}
       
  6267   ins_pipe( pipe_slow );
       
  6268 %}
       
  6269 
       
  6270 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
       
  6271   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  5849   match(Set dst (AddVS src1 src2));
  6272   match(Set dst (AddVS src1 src2));
  5850   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
  6273   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
  5851   ins_encode %{
  6274   ins_encode %{
  5852     int vector_len = 1;
  6275     int vector_len = 1;
  5853     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6276     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5854   %}
  6277   %}
  5855   ins_pipe( pipe_slow );
  6278   ins_pipe( pipe_slow );
  5856 %}
  6279 %}
  5857 
  6280 
  5858 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
  6281 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
  5859   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  6282   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  6283   match(Set dst (AddVS src1 src2));
       
  6284   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
       
  6285   ins_encode %{
       
  6286     int vector_len = 1;
       
  6287     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6288   %}
       
  6289   ins_pipe( pipe_slow );
       
  6290 %}
       
  6291 
       
  6292 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
       
  6293   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  6294   match(Set dst (AddVS dst src2));
       
  6295   effect(TEMP src1);
       
  6296   format %{ "vpaddw  $dst,$dst,$src2\t! add packed16S" %}
       
  6297   ins_encode %{
       
  6298     int vector_len = 1;
       
  6299     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6300   %}
       
  6301   ins_pipe( pipe_slow );
       
  6302 %}
       
  6303 
       
  6304 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{
       
  6305   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  5860   match(Set dst (AddVS src (LoadVector mem)));
  6306   match(Set dst (AddVS src (LoadVector mem)));
  5861   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
  6307   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
  5862   ins_encode %{
  6308   ins_encode %{
  5863     int vector_len = 1;
  6309     int vector_len = 1;
  5864     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6310     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  5865   %}
  6311   %}
  5866   ins_pipe( pipe_slow );
  6312   ins_pipe( pipe_slow );
  5867 %}
  6313 %}
  5868 
  6314 
       
  6315 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{
       
  6316   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  6317   match(Set dst (AddVS src (LoadVector mem)));
       
  6318   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
       
  6319   ins_encode %{
       
  6320     int vector_len = 1;
       
  6321     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6322   %}
       
  6323   ins_pipe( pipe_slow );
       
  6324 %}
       
  6325 
       
  6326 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
       
  6327   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  6328   match(Set dst (AddVS dst (LoadVector mem)));
       
  6329   effect(TEMP src);
       
  6330   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
       
  6331   ins_encode %{
       
  6332     int vector_len = 1;
       
  6333     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6334   %}
       
  6335   ins_pipe( pipe_slow );
       
  6336 %}
       
  6337 
  5869 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
  6338 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
  5870   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  6339   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  5871   match(Set dst (AddVS src1 src2));
  6340   match(Set dst (AddVS src1 src2));
  5872   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
  6341   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
  5873   ins_encode %{
  6342   ins_encode %{
  5874     int vector_len = 2;
  6343     int vector_len = 2;
  5875     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6344     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  5876   %}
  6345   %}
  5877   ins_pipe( pipe_slow );
  6346   ins_pipe( pipe_slow );
  5878 %}
  6347 %}
  5879 
  6348 
  5880 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
  6349 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
  5881   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  6350   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  5882   match(Set dst (AddVS src (LoadVector mem)));
  6351   match(Set dst (AddVS src (LoadVector mem)));
  5883   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
  6352   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
  5884   ins_encode %{
  6353   ins_encode %{
  5885     int vector_len = 2;
  6354     int vector_len = 2;
  5886     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6355     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6262 
  6731 
  6263 // --------------------------------- SUB --------------------------------------
  6732 // --------------------------------- SUB --------------------------------------
  6264 
  6733 
  6265 // Bytes vector sub
  6734 // Bytes vector sub
  6266 instruct vsub4B(vecS dst, vecS src) %{
  6735 instruct vsub4B(vecS dst, vecS src) %{
  6267   predicate(n->as_Vector()->length() == 4);
  6736   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  6268   match(Set dst (SubVB dst src));
  6737   match(Set dst (SubVB dst src));
  6269   format %{ "psubb   $dst,$src\t! sub packed4B" %}
  6738   format %{ "psubb   $dst,$src\t! sub packed4B" %}
  6270   ins_encode %{
  6739   ins_encode %{
  6271     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  6740     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  6272   %}
  6741   %}
  6273   ins_pipe( pipe_slow );
  6742   ins_pipe( pipe_slow );
  6274 %}
  6743 %}
  6275 
  6744 
  6276 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
  6745 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
  6277   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  6746   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  6278   match(Set dst (SubVB src1 src2));
  6747   match(Set dst (SubVB src1 src2));
  6279   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
  6748   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
  6280   ins_encode %{
  6749   ins_encode %{
  6281     int vector_len = 0;
  6750     int vector_len = 0;
  6282     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6751     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6283   %}
  6752   %}
  6284   ins_pipe( pipe_slow );
  6753   ins_pipe( pipe_slow );
  6285 %}
  6754 %}
  6286 
  6755 
  6287 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{
  6756 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
  6288   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  6757   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  6758   match(Set dst (SubVB src1 src2));
       
  6759   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
       
  6760   ins_encode %{
       
  6761     int vector_len = 0;
       
  6762     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6763   %}
       
  6764   ins_pipe( pipe_slow );
       
  6765 %}
       
  6766 
       
  6767 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{
       
  6768   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  6769   match(Set dst (SubVB dst src2));
       
  6770   effect(TEMP src1);
       
  6771   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
       
  6772   ins_encode %{
       
  6773     int vector_len = 0;
       
  6774     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6775   %}
       
  6776   ins_pipe( pipe_slow );
       
  6777 %}
       
  6778 
       
  6779 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{
       
  6780   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  6289   match(Set dst (SubVB src (LoadVector mem)));
  6781   match(Set dst (SubVB src (LoadVector mem)));
  6290   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
  6782   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
  6291   ins_encode %{
  6783   ins_encode %{
  6292     int vector_len = 0;
  6784     int vector_len = 0;
  6293     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6785     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6294   %}
  6786   %}
  6295   ins_pipe( pipe_slow );
  6787   ins_pipe( pipe_slow );
  6296 %}
  6788 %}
  6297 
  6789 
       
  6790 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{
       
  6791   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  6792   match(Set dst (SubVB src (LoadVector mem)));
       
  6793   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
       
  6794   ins_encode %{
       
  6795     int vector_len = 0;
       
  6796     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6797   %}
       
  6798   ins_pipe( pipe_slow );
       
  6799 %}
       
  6800 
       
  6801 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
       
  6802   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  6803   match(Set dst (SubVB dst (LoadVector mem)));
       
  6804   effect(TEMP src);
       
  6805   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
       
  6806   ins_encode %{
       
  6807     int vector_len = 0;
       
  6808     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6809   %}
       
  6810   ins_pipe( pipe_slow );
       
  6811 %}
       
  6812 
  6298 instruct vsub8B(vecD dst, vecD src) %{
  6813 instruct vsub8B(vecD dst, vecD src) %{
  6299   predicate(n->as_Vector()->length() == 8);
  6814   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  6300   match(Set dst (SubVB dst src));
  6815   match(Set dst (SubVB dst src));
  6301   format %{ "psubb   $dst,$src\t! sub packed8B" %}
  6816   format %{ "psubb   $dst,$src\t! sub packed8B" %}
  6302   ins_encode %{
  6817   ins_encode %{
  6303     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  6818     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  6304   %}
  6819   %}
  6305   ins_pipe( pipe_slow );
  6820   ins_pipe( pipe_slow );
  6306 %}
  6821 %}
  6307 
  6822 
  6308 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
  6823 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
  6309   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  6824   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  6310   match(Set dst (SubVB src1 src2));
  6825   match(Set dst (SubVB src1 src2));
  6311   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
  6826   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
  6312   ins_encode %{
  6827   ins_encode %{
  6313     int vector_len = 0;
  6828     int vector_len = 0;
  6314     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6829     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6315   %}
  6830   %}
  6316   ins_pipe( pipe_slow );
  6831   ins_pipe( pipe_slow );
  6317 %}
  6832 %}
  6318 
  6833 
  6319 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{
  6834 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
  6320   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  6835   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  6836   match(Set dst (SubVB src1 src2));
       
  6837   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
       
  6838   ins_encode %{
       
  6839     int vector_len = 0;
       
  6840     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6841   %}
       
  6842   ins_pipe( pipe_slow );
       
  6843 %}
       
  6844 
       
  6845 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
       
  6846   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  6847   match(Set dst (SubVB dst src2));
       
  6848   effect(TEMP src1);
       
  6849   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
       
  6850   ins_encode %{
       
  6851     int vector_len = 0;
       
  6852     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6853   %}
       
  6854   ins_pipe( pipe_slow );
       
  6855 %}
       
  6856 
       
  6857 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{
       
  6858   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  6321   match(Set dst (SubVB src (LoadVector mem)));
  6859   match(Set dst (SubVB src (LoadVector mem)));
  6322   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
  6860   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
  6323   ins_encode %{
  6861   ins_encode %{
  6324     int vector_len = 0;
  6862     int vector_len = 0;
  6325     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6863     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6326   %}
  6864   %}
  6327   ins_pipe( pipe_slow );
  6865   ins_pipe( pipe_slow );
  6328 %}
  6866 %}
  6329 
  6867 
       
  6868 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{
       
  6869   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  6870   match(Set dst (SubVB src (LoadVector mem)));
       
  6871   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
       
  6872   ins_encode %{
       
  6873     int vector_len = 0;
       
  6874     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6875   %}
       
  6876   ins_pipe( pipe_slow );
       
  6877 %}
       
  6878 
       
  6879 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
       
  6880   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  6881   match(Set dst (SubVB dst (LoadVector mem)));
       
  6882   effect(TEMP src);
       
  6883   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
       
  6884   ins_encode %{
       
  6885     int vector_len = 0;
       
  6886     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6887   %}
       
  6888   ins_pipe( pipe_slow );
       
  6889 %}
       
  6890 
  6330 instruct vsub16B(vecX dst, vecX src) %{
  6891 instruct vsub16B(vecX dst, vecX src) %{
  6331   predicate(n->as_Vector()->length() == 16);
  6892   predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
  6332   match(Set dst (SubVB dst src));
  6893   match(Set dst (SubVB dst src));
  6333   format %{ "psubb   $dst,$src\t! sub packed16B" %}
  6894   format %{ "psubb   $dst,$src\t! sub packed16B" %}
  6334   ins_encode %{
  6895   ins_encode %{
  6335     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  6896     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
  6336   %}
  6897   %}
  6337   ins_pipe( pipe_slow );
  6898   ins_pipe( pipe_slow );
  6338 %}
  6899 %}
  6339 
  6900 
  6340 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
  6901 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
  6341   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  6902   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
  6342   match(Set dst (SubVB src1 src2));
  6903   match(Set dst (SubVB src1 src2));
  6343   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
  6904   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
  6344   ins_encode %{
  6905   ins_encode %{
  6345     int vector_len = 0;
  6906     int vector_len = 0;
  6346     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6907     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6347   %}
  6908   %}
  6348   ins_pipe( pipe_slow );
  6909   ins_pipe( pipe_slow );
  6349 %}
  6910 %}
  6350 
  6911 
  6351 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
  6912 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
  6352   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
  6913   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  6914   match(Set dst (SubVB src1 src2));
       
  6915   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
       
  6916   ins_encode %{
       
  6917     int vector_len = 0;
       
  6918     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6919   %}
       
  6920   ins_pipe( pipe_slow );
       
  6921 %}
       
  6922 
       
  6923 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
       
  6924   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  6925   match(Set dst (SubVB dst src2));
       
  6926   effect(TEMP src1);
       
  6927   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
       
  6928   ins_encode %{
       
  6929     int vector_len = 0;
       
  6930     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6931   %}
       
  6932   ins_pipe( pipe_slow );
       
  6933 %}
       
  6934 
       
  6935 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{
       
  6936   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
  6353   match(Set dst (SubVB src (LoadVector mem)));
  6937   match(Set dst (SubVB src (LoadVector mem)));
  6354   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
  6938   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
  6355   ins_encode %{
  6939   ins_encode %{
  6356     int vector_len = 0;
  6940     int vector_len = 0;
  6357     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6941     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6358   %}
  6942   %}
  6359   ins_pipe( pipe_slow );
  6943   ins_pipe( pipe_slow );
  6360 %}
  6944 %}
  6361 
  6945 
  6362 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
  6946 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{
  6363   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  6947   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  6948   match(Set dst (SubVB src (LoadVector mem)));
       
  6949   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
       
  6950   ins_encode %{
       
  6951     int vector_len = 0;
       
  6952     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6953   %}
       
  6954   ins_pipe( pipe_slow );
       
  6955 %}
       
  6956 
       
  6957 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
       
  6958   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  6959   match(Set dst (SubVB dst (LoadVector mem)));
       
  6960   effect(TEMP src);
       
  6961   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
       
  6962   ins_encode %{
       
  6963     int vector_len = 0;
       
  6964     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  6965   %}
       
  6966   ins_pipe( pipe_slow );
       
  6967 %}
       
  6968 
       
  6969 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
       
  6970   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
  6364   match(Set dst (SubVB src1 src2));
  6971   match(Set dst (SubVB src1 src2));
  6365   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
  6972   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
  6366   ins_encode %{
  6973   ins_encode %{
  6367     int vector_len = 1;
  6974     int vector_len = 1;
  6368     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6975     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6369   %}
  6976   %}
  6370   ins_pipe( pipe_slow );
  6977   ins_pipe( pipe_slow );
  6371 %}
  6978 %}
  6372 
  6979 
  6373 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
  6980 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
  6374   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  6981   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
       
  6982   match(Set dst (SubVB src1 src2));
       
  6983   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
       
  6984   ins_encode %{
       
  6985     int vector_len = 1;
       
  6986     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6987   %}
       
  6988   ins_pipe( pipe_slow );
       
  6989 %}
       
  6990 
       
  6991 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
       
  6992   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
       
  6993   match(Set dst (SubVB dst src2));
       
  6994   effect(TEMP src1);
       
  6995   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
       
  6996   ins_encode %{
       
  6997     int vector_len = 1;
       
  6998     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  6999   %}
       
  7000   ins_pipe( pipe_slow );
       
  7001 %}
       
  7002 
       
  7003 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{
       
  7004   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
  6375   match(Set dst (SubVB src (LoadVector mem)));
  7005   match(Set dst (SubVB src (LoadVector mem)));
  6376   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
  7006   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
  6377   ins_encode %{
  7007   ins_encode %{
  6378     int vector_len = 1;
  7008     int vector_len = 1;
  6379     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7009     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6380   %}
  7010   %}
  6381   ins_pipe( pipe_slow );
  7011   ins_pipe( pipe_slow );
  6382 %}
  7012 %}
  6383 
  7013 
       
  7014 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{
       
  7015   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
       
  7016   match(Set dst (SubVB src (LoadVector mem)));
       
  7017   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
       
  7018   ins_encode %{
       
  7019     int vector_len = 1;
       
  7020     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7021   %}
       
  7022   ins_pipe( pipe_slow );
       
  7023 %}
       
  7024 
       
  7025 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
       
  7026   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
       
  7027   match(Set dst (SubVB dst (LoadVector mem)));
       
  7028   effect(TEMP src);
       
  7029   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
       
  7030   ins_encode %{
       
  7031     int vector_len = 1;
       
  7032     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7033   %}
       
  7034   ins_pipe( pipe_slow );
       
  7035 %}
       
  7036 
  6384 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
  7037 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
  6385   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
  7038   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
  6386   match(Set dst (SubVB src1 src2));
  7039   match(Set dst (SubVB src1 src2));
  6387   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
  7040   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
  6388   ins_encode %{
  7041   ins_encode %{
  6389     int vector_len = 2;
  7042     int vector_len = 2;
  6390     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7043     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6391   %}
  7044   %}
  6392   ins_pipe( pipe_slow );
  7045   ins_pipe( pipe_slow );
  6393 %}
  7046 %}
  6394 
  7047 
  6395 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
  7048 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
  6396   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
  7049   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
  6397   match(Set dst (SubVB src (LoadVector mem)));
  7050   match(Set dst (SubVB src (LoadVector mem)));
  6398   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
  7051   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
  6399   ins_encode %{
  7052   ins_encode %{
  6400     int vector_len = 2;
  7053     int vector_len = 2;
  6401     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7054     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6403   ins_pipe( pipe_slow );
  7056   ins_pipe( pipe_slow );
  6404 %}
  7057 %}
  6405 
  7058 
  6406 // Shorts/Chars vector sub
  7059 // Shorts/Chars vector sub
  6407 instruct vsub2S(vecS dst, vecS src) %{
  7060 instruct vsub2S(vecS dst, vecS src) %{
  6408   predicate(n->as_Vector()->length() == 2);
  7061   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
  6409   match(Set dst (SubVS dst src));
  7062   match(Set dst (SubVS dst src));
  6410   format %{ "psubw   $dst,$src\t! sub packed2S" %}
  7063   format %{ "psubw   $dst,$src\t! sub packed2S" %}
  6411   ins_encode %{
  7064   ins_encode %{
  6412     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  7065     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  6413   %}
  7066   %}
  6414   ins_pipe( pipe_slow );
  7067   ins_pipe( pipe_slow );
  6415 %}
  7068 %}
  6416 
  7069 
  6417 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
  7070 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
  6418   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  7071   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  6419   match(Set dst (SubVS src1 src2));
  7072   match(Set dst (SubVS src1 src2));
  6420   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
  7073   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
  6421   ins_encode %{
  7074   ins_encode %{
  6422     int vector_len = 0;
  7075     int vector_len = 0;
  6423     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7076     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6424   %}
  7077   %}
  6425   ins_pipe( pipe_slow );
  7078   ins_pipe( pipe_slow );
  6426 %}
  7079 %}
  6427 
  7080 
  6428 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{
  7081 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
  6429   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  7082   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  7083   match(Set dst (SubVS src1 src2));
       
  7084   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
       
  7085   ins_encode %{
       
  7086     int vector_len = 0;
       
  7087     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7088   %}
       
  7089   ins_pipe( pipe_slow );
       
  7090 %}
       
  7091 
       
  7092 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
       
  7093   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  7094   match(Set dst (SubVS dst src2));
       
  7095   effect(TEMP src1);
       
  7096   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
       
  7097   ins_encode %{
       
  7098     int vector_len = 0;
       
  7099     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7100   %}
       
  7101   ins_pipe( pipe_slow );
       
  7102 %}
       
  7103 
       
  7104 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{
       
  7105   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  6430   match(Set dst (SubVS src (LoadVector mem)));
  7106   match(Set dst (SubVS src (LoadVector mem)));
  6431   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
  7107   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
  6432   ins_encode %{
  7108   ins_encode %{
  6433     int vector_len = 0;
  7109     int vector_len = 0;
  6434     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7110     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6435   %}
  7111   %}
  6436   ins_pipe( pipe_slow );
  7112   ins_pipe( pipe_slow );
  6437 %}
  7113 %}
  6438 
  7114 
       
  7115 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{
       
  7116   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  7117   match(Set dst (SubVS src (LoadVector mem)));
       
  7118   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
       
  7119   ins_encode %{
       
  7120     int vector_len = 0;
       
  7121     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7122   %}
       
  7123   ins_pipe( pipe_slow );
       
  7124 %}
       
  7125 
       
  7126 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
       
  7127   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  7128   match(Set dst (SubVS dst (LoadVector mem)));
       
  7129   effect(TEMP src);
       
  7130   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
       
  7131   ins_encode %{
       
  7132     int vector_len = 0;
       
  7133     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7134   %}
       
  7135   ins_pipe( pipe_slow );
       
  7136 %}
       
  7137 
  6439 instruct vsub4S(vecD dst, vecD src) %{
  7138 instruct vsub4S(vecD dst, vecD src) %{
  6440   predicate(n->as_Vector()->length() == 4);
  7139   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  6441   match(Set dst (SubVS dst src));
  7140   match(Set dst (SubVS dst src));
  6442   format %{ "psubw   $dst,$src\t! sub packed4S" %}
  7141   format %{ "psubw   $dst,$src\t! sub packed4S" %}
  6443   ins_encode %{
  7142   ins_encode %{
  6444     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  7143     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  6445   %}
  7144   %}
  6446   ins_pipe( pipe_slow );
  7145   ins_pipe( pipe_slow );
  6447 %}
  7146 %}
  6448 
  7147 
  6449 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
  7148 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
  6450   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  7149   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  6451   match(Set dst (SubVS src1 src2));
  7150   match(Set dst (SubVS src1 src2));
  6452   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
  7151   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
  6453   ins_encode %{
  7152   ins_encode %{
  6454     int vector_len = 0;
  7153     int vector_len = 0;
  6455     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7154     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6456   %}
  7155   %}
  6457   ins_pipe( pipe_slow );
  7156   ins_pipe( pipe_slow );
  6458 %}
  7157 %}
  6459 
  7158 
  6460 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{
  7159 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
  6461   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  7160   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  7161   match(Set dst (SubVS src1 src2));
       
  7162   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
       
  7163   ins_encode %{
       
  7164     int vector_len = 0;
       
  7165     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7166   %}
       
  7167   ins_pipe( pipe_slow );
       
  7168 %}
       
  7169 
       
  7170 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
       
  7171   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  7172   match(Set dst (SubVS dst src2));
       
  7173   effect(TEMP src1);
       
  7174   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
       
  7175   ins_encode %{
       
  7176     int vector_len = 0;
       
  7177     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7178   %}
       
  7179   ins_pipe( pipe_slow );
       
  7180 %}
       
  7181 
       
  7182 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{
       
  7183   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  6462   match(Set dst (SubVS src (LoadVector mem)));
  7184   match(Set dst (SubVS src (LoadVector mem)));
  6463   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
  7185   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
  6464   ins_encode %{
  7186   ins_encode %{
  6465     int vector_len = 0;
  7187     int vector_len = 0;
  6466     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7188     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6467   %}
  7189   %}
  6468   ins_pipe( pipe_slow );
  7190   ins_pipe( pipe_slow );
  6469 %}
  7191 %}
  6470 
  7192 
       
  7193 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{
       
  7194   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  7195   match(Set dst (SubVS src (LoadVector mem)));
       
  7196   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
       
  7197   ins_encode %{
       
  7198     int vector_len = 0;
       
  7199     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7200   %}
       
  7201   ins_pipe( pipe_slow );
       
  7202 %}
       
  7203 
       
  7204 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
       
  7205   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  7206   match(Set dst (SubVS dst (LoadVector mem)));
       
  7207   effect(TEMP src);
       
  7208   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
       
  7209   ins_encode %{
       
  7210     int vector_len = 0;
       
  7211     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7212   %}
       
  7213   ins_pipe( pipe_slow );
       
  7214 %}
       
  7215 
  6471 instruct vsub8S(vecX dst, vecX src) %{
  7216 instruct vsub8S(vecX dst, vecX src) %{
  6472   predicate(n->as_Vector()->length() == 8);
  7217   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  6473   match(Set dst (SubVS dst src));
  7218   match(Set dst (SubVS dst src));
  6474   format %{ "psubw   $dst,$src\t! sub packed8S" %}
  7219   format %{ "psubw   $dst,$src\t! sub packed8S" %}
  6475   ins_encode %{
  7220   ins_encode %{
  6476     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  7221     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
  6477   %}
  7222   %}
  6478   ins_pipe( pipe_slow );
  7223   ins_pipe( pipe_slow );
  6479 %}
  7224 %}
  6480 
  7225 
  6481 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
  7226 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
  6482   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  7227   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  6483   match(Set dst (SubVS src1 src2));
  7228   match(Set dst (SubVS src1 src2));
  6484   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
  7229   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
  6485   ins_encode %{
  7230   ins_encode %{
  6486     int vector_len = 0;
  7231     int vector_len = 0;
  6487     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7232     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6488   %}
  7233   %}
  6489   ins_pipe( pipe_slow );
  7234   ins_pipe( pipe_slow );
  6490 %}
  7235 %}
  6491 
  7236 
  6492 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
  7237 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
  6493   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  7238   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  7239   match(Set dst (SubVS src1 src2));
       
  7240   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
       
  7241   ins_encode %{
       
  7242     int vector_len = 0;
       
  7243     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7244   %}
       
  7245   ins_pipe( pipe_slow );
       
  7246 %}
       
  7247 
       
  7248 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
       
  7249   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  7250   match(Set dst (SubVS dst src2));
       
  7251   effect(TEMP src1);
       
  7252   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
       
  7253   ins_encode %{
       
  7254     int vector_len = 0;
       
  7255     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7256   %}
       
  7257   ins_pipe( pipe_slow );
       
  7258 %}
       
  7259 
       
  7260 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{
       
  7261   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  6494   match(Set dst (SubVS src (LoadVector mem)));
  7262   match(Set dst (SubVS src (LoadVector mem)));
  6495   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
  7263   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
  6496   ins_encode %{
  7264   ins_encode %{
  6497     int vector_len = 0;
  7265     int vector_len = 0;
  6498     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7266     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6499   %}
  7267   %}
  6500   ins_pipe( pipe_slow );
  7268   ins_pipe( pipe_slow );
  6501 %}
  7269 %}
  6502 
  7270 
  6503 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
  7271 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{
  6504   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  7272   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  7273   match(Set dst (SubVS src (LoadVector mem)));
       
  7274   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
       
  7275   ins_encode %{
       
  7276     int vector_len = 0;
       
  7277     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7278   %}
       
  7279   ins_pipe( pipe_slow );
       
  7280 %}
       
  7281 
       
  7282 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
       
  7283   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  7284   match(Set dst (SubVS dst (LoadVector mem)));
       
  7285   effect(TEMP src);
       
  7286   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
       
  7287   ins_encode %{
       
  7288     int vector_len = 0;
       
  7289     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7290   %}
       
  7291   ins_pipe( pipe_slow );
       
  7292 %}
       
  7293 
       
  7294 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
       
  7295   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  6505   match(Set dst (SubVS src1 src2));
  7296   match(Set dst (SubVS src1 src2));
  6506   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
  7297   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
  6507   ins_encode %{
  7298   ins_encode %{
  6508     int vector_len = 1;
  7299     int vector_len = 1;
  6509     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7300     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6510   %}
  7301   %}
  6511   ins_pipe( pipe_slow );
  7302   ins_pipe( pipe_slow );
  6512 %}
  7303 %}
  6513 
  7304 
  6514 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
  7305 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
  6515   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  7306   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  7307   match(Set dst (SubVS src1 src2));
       
  7308   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
       
  7309   ins_encode %{
       
  7310     int vector_len = 1;
       
  7311     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7312   %}
       
  7313   ins_pipe( pipe_slow );
       
  7314 %}
       
  7315 
       
  7316 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
       
  7317   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  7318   match(Set dst (SubVS dst src2));
       
  7319   effect(TEMP src1);
       
  7320   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
       
  7321   ins_encode %{
       
  7322     int vector_len = 1;
       
  7323     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7324   %}
       
  7325   ins_pipe( pipe_slow );
       
  7326 %}
       
  7327 
       
  7328 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{
       
  7329   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  6516   match(Set dst (SubVS src (LoadVector mem)));
  7330   match(Set dst (SubVS src (LoadVector mem)));
  6517   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
  7331   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
  6518   ins_encode %{
  7332   ins_encode %{
  6519     int vector_len = 1;
  7333     int vector_len = 1;
  6520     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7334     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6521   %}
  7335   %}
  6522   ins_pipe( pipe_slow );
  7336   ins_pipe( pipe_slow );
  6523 %}
  7337 %}
  6524 
  7338 
       
  7339 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{
       
  7340   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  7341   match(Set dst (SubVS src (LoadVector mem)));
       
  7342   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
       
  7343   ins_encode %{
       
  7344     int vector_len = 1;
       
  7345     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7346   %}
       
  7347   ins_pipe( pipe_slow );
       
  7348 %}
       
  7349 
       
  7350 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
       
  7351   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  7352   match(Set dst (SubVS dst (LoadVector mem)));
       
  7353    effect(TEMP src);
       
  7354   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
       
  7355   ins_encode %{
       
  7356     int vector_len = 1;
       
  7357     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7358   %}
       
  7359   ins_pipe( pipe_slow );
       
  7360 %}
       
  7361 
  6525 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
  7362 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
  6526   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  7363   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  6527   match(Set dst (SubVS src1 src2));
  7364   match(Set dst (SubVS src1 src2));
  6528   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
  7365   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
  6529   ins_encode %{
  7366   ins_encode %{
  6530     int vector_len = 2;
  7367     int vector_len = 2;
  6531     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7368     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6532   %}
  7369   %}
  6533   ins_pipe( pipe_slow );
  7370   ins_pipe( pipe_slow );
  6534 %}
  7371 %}
  6535 
  7372 
  6536 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
  7373 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
  6537   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  7374   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  6538   match(Set dst (SubVS src (LoadVector mem)));
  7375   match(Set dst (SubVS src (LoadVector mem)));
  6539   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
  7376   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
  6540   ins_encode %{
  7377   ins_encode %{
  6541     int vector_len = 2;
  7378     int vector_len = 2;
  6542     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7379     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6918 
  7755 
  6919 // --------------------------------- MUL --------------------------------------
  7756 // --------------------------------- MUL --------------------------------------
  6920 
  7757 
  6921 // Shorts/Chars vector mul
  7758 // Shorts/Chars vector mul
  6922 instruct vmul2S(vecS dst, vecS src) %{
  7759 instruct vmul2S(vecS dst, vecS src) %{
  6923   predicate(n->as_Vector()->length() == 2);
  7760   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
  6924   match(Set dst (MulVS dst src));
  7761   match(Set dst (MulVS dst src));
  6925   format %{ "pmullw $dst,$src\t! mul packed2S" %}
  7762   format %{ "pmullw $dst,$src\t! mul packed2S" %}
  6926   ins_encode %{
  7763   ins_encode %{
  6927     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  7764     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  6928   %}
  7765   %}
  6929   ins_pipe( pipe_slow );
  7766   ins_pipe( pipe_slow );
  6930 %}
  7767 %}
  6931 
  7768 
  6932 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
  7769 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
  6933   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  7770   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  6934   match(Set dst (MulVS src1 src2));
  7771   match(Set dst (MulVS src1 src2));
  6935   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
  7772   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
  6936   ins_encode %{
  7773   ins_encode %{
  6937     int vector_len = 0;
  7774     int vector_len = 0;
  6938     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7775     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6939   %}
  7776   %}
  6940   ins_pipe( pipe_slow );
  7777   ins_pipe( pipe_slow );
  6941 %}
  7778 %}
  6942 
  7779 
  6943 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{
  7780 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
  6944   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  7781   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  7782   match(Set dst (MulVS src1 src2));
       
  7783   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
       
  7784   ins_encode %{
       
  7785     int vector_len = 0;
       
  7786     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7787   %}
       
  7788   ins_pipe( pipe_slow );
       
  7789 %}
       
  7790 
       
  7791 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{
       
  7792   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  7793   match(Set dst (MulVS dst src2));
       
  7794   effect(TEMP src1);
       
  7795   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
       
  7796   ins_encode %{
       
  7797     int vector_len = 0;
       
  7798     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7799   %}
       
  7800   ins_pipe( pipe_slow );
       
  7801 %}
       
  7802 
       
  7803 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{
       
  7804   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  6945   match(Set dst (MulVS src (LoadVector mem)));
  7805   match(Set dst (MulVS src (LoadVector mem)));
  6946   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
  7806   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
  6947   ins_encode %{
  7807   ins_encode %{
  6948     int vector_len = 0;
  7808     int vector_len = 0;
  6949     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7809     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6950   %}
  7810   %}
  6951   ins_pipe( pipe_slow );
  7811   ins_pipe( pipe_slow );
  6952 %}
  7812 %}
  6953 
  7813 
       
  7814 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{
       
  7815   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  7816   match(Set dst (MulVS src (LoadVector mem)));
       
  7817   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
       
  7818   ins_encode %{
       
  7819     int vector_len = 0;
       
  7820     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7821   %}
       
  7822   ins_pipe( pipe_slow );
       
  7823 %}
       
  7824 
       
  7825 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
       
  7826   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  7827   match(Set dst (MulVS dst (LoadVector mem)));
       
  7828   effect(TEMP src);
       
  7829   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
       
  7830   ins_encode %{
       
  7831     int vector_len = 0;
       
  7832     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7833   %}
       
  7834   ins_pipe( pipe_slow );
       
  7835 %}
       
  7836 
  6954 instruct vmul4S(vecD dst, vecD src) %{
  7837 instruct vmul4S(vecD dst, vecD src) %{
  6955   predicate(n->as_Vector()->length() == 4);
  7838   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  6956   match(Set dst (MulVS dst src));
  7839   match(Set dst (MulVS dst src));
  6957   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
  7840   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
  6958   ins_encode %{
  7841   ins_encode %{
  6959     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  7842     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  6960   %}
  7843   %}
  6961   ins_pipe( pipe_slow );
  7844   ins_pipe( pipe_slow );
  6962 %}
  7845 %}
  6963 
  7846 
  6964 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
  7847 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
  6965   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  7848   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  6966   match(Set dst (MulVS src1 src2));
  7849   match(Set dst (MulVS src1 src2));
  6967   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
  7850   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
  6968   ins_encode %{
  7851   ins_encode %{
  6969     int vector_len = 0;
  7852     int vector_len = 0;
  6970     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7853     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  6971   %}
  7854   %}
  6972   ins_pipe( pipe_slow );
  7855   ins_pipe( pipe_slow );
  6973 %}
  7856 %}
  6974 
  7857 
  6975 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{
  7858 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
  6976   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  7859   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  7860   match(Set dst (MulVS src1 src2));
       
  7861   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
       
  7862   ins_encode %{
       
  7863     int vector_len = 0;
       
  7864     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7865   %}
       
  7866   ins_pipe( pipe_slow );
       
  7867 %}
       
  7868 
       
  7869 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
       
  7870   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  7871   match(Set dst (MulVS dst src2));
       
  7872   effect(TEMP src1);
       
  7873   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
       
  7874   ins_encode %{
       
  7875     int vector_len = 0;
       
  7876     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7877   %}
       
  7878   ins_pipe( pipe_slow );
       
  7879 %}
       
  7880 
       
  7881 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{
       
  7882   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  6977   match(Set dst (MulVS src (LoadVector mem)));
  7883   match(Set dst (MulVS src (LoadVector mem)));
  6978   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
  7884   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
  6979   ins_encode %{
  7885   ins_encode %{
  6980     int vector_len = 0;
  7886     int vector_len = 0;
  6981     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7887     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  6982   %}
  7888   %}
  6983   ins_pipe( pipe_slow );
  7889   ins_pipe( pipe_slow );
  6984 %}
  7890 %}
  6985 
  7891 
       
  7892 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{
       
  7893   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  7894   match(Set dst (MulVS src (LoadVector mem)));
       
  7895   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
       
  7896   ins_encode %{
       
  7897     int vector_len = 0;
       
  7898     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7899   %}
       
  7900   ins_pipe( pipe_slow );
       
  7901 %}
       
  7902 
       
  7903 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
       
  7904   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  7905   match(Set dst (MulVS dst (LoadVector mem)));
       
  7906   effect(TEMP src);
       
  7907   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
       
  7908   ins_encode %{
       
  7909     int vector_len = 0;
       
  7910     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7911   %}
       
  7912   ins_pipe( pipe_slow );
       
  7913 %}
       
  7914 
  6986 instruct vmul8S(vecX dst, vecX src) %{
  7915 instruct vmul8S(vecX dst, vecX src) %{
  6987   predicate(n->as_Vector()->length() == 8);
  7916   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  6988   match(Set dst (MulVS dst src));
  7917   match(Set dst (MulVS dst src));
  6989   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
  7918   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
  6990   ins_encode %{
  7919   ins_encode %{
  6991     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  7920     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
  6992   %}
  7921   %}
  6993   ins_pipe( pipe_slow );
  7922   ins_pipe( pipe_slow );
  6994 %}
  7923 %}
  6995 
  7924 
  6996 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
  7925 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
  6997   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  7926   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  6998   match(Set dst (MulVS src1 src2));
  7927   match(Set dst (MulVS src1 src2));
  6999   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
  7928   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
  7000   ins_encode %{
  7929   ins_encode %{
  7001     int vector_len = 0;
  7930     int vector_len = 0;
  7002     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7931     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7003   %}
  7932   %}
  7004   ins_pipe( pipe_slow );
  7933   ins_pipe( pipe_slow );
  7005 %}
  7934 %}
  7006 
  7935 
  7007 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
  7936 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
  7008   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  7937   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  7938   match(Set dst (MulVS src1 src2));
       
  7939   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
       
  7940   ins_encode %{
       
  7941     int vector_len = 0;
       
  7942     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7943   %}
       
  7944   ins_pipe( pipe_slow );
       
  7945 %}
       
  7946 
       
  7947 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
       
  7948   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  7949   match(Set dst (MulVS dst src2));
       
  7950   effect(TEMP src1);
       
  7951   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
       
  7952   ins_encode %{
       
  7953     int vector_len = 0;
       
  7954     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  7955   %}
       
  7956   ins_pipe( pipe_slow );
       
  7957 %}
       
  7958 
       
  7959 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{
       
  7960   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  7009   match(Set dst (MulVS src (LoadVector mem)));
  7961   match(Set dst (MulVS src (LoadVector mem)));
  7010   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
  7962   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
  7011   ins_encode %{
  7963   ins_encode %{
  7012     int vector_len = 0;
  7964     int vector_len = 0;
  7013     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7965     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7014   %}
  7966   %}
  7015   ins_pipe( pipe_slow );
  7967   ins_pipe( pipe_slow );
  7016 %}
  7968 %}
  7017 
  7969 
  7018 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
  7970 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{
  7019   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  7971   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  7972   match(Set dst (MulVS src (LoadVector mem)));
       
  7973   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
       
  7974   ins_encode %{
       
  7975     int vector_len = 0;
       
  7976     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7977   %}
       
  7978   ins_pipe( pipe_slow );
       
  7979 %}
       
  7980 
       
  7981 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
       
  7982   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  7983   match(Set dst (MulVS dst (LoadVector mem)));
       
  7984   effect(TEMP src);
       
  7985   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
       
  7986   ins_encode %{
       
  7987     int vector_len = 0;
       
  7988     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  7989   %}
       
  7990   ins_pipe( pipe_slow );
       
  7991 %}
       
  7992 
       
  7993 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
       
  7994   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  7020   match(Set dst (MulVS src1 src2));
  7995   match(Set dst (MulVS src1 src2));
  7021   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
  7996   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
  7022   ins_encode %{
  7997   ins_encode %{
  7023     int vector_len = 1;
  7998     int vector_len = 1;
  7024     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7999     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7025   %}
  8000   %}
  7026   ins_pipe( pipe_slow );
  8001   ins_pipe( pipe_slow );
  7027 %}
  8002 %}
  7028 
  8003 
  7029 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
  8004 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
  7030   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  8005   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  8006   match(Set dst (MulVS src1 src2));
       
  8007   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
       
  8008   ins_encode %{
       
  8009     int vector_len = 1;
       
  8010     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  8011   %}
       
  8012   ins_pipe( pipe_slow );
       
  8013 %}
       
  8014 
       
  8015 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
       
  8016   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  8017   match(Set dst (MulVS dst src2));
       
  8018   effect(TEMP src1);
       
  8019   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
       
  8020   ins_encode %{
       
  8021     int vector_len = 1;
       
  8022     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
       
  8023   %}
       
  8024   ins_pipe( pipe_slow );
       
  8025 %}
       
  8026 
       
  8027 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{
       
  8028   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  7031   match(Set dst (MulVS src (LoadVector mem)));
  8029   match(Set dst (MulVS src (LoadVector mem)));
  7032   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
  8030   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
  7033   ins_encode %{
  8031   ins_encode %{
  7034     int vector_len = 1;
  8032     int vector_len = 1;
  7035     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  8033     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7036   %}
  8034   %}
  7037   ins_pipe( pipe_slow );
  8035   ins_pipe( pipe_slow );
  7038 %}
  8036 %}
  7039 
  8037 
       
  8038 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{
       
  8039   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  8040   match(Set dst (MulVS src (LoadVector mem)));
       
  8041   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
       
  8042   ins_encode %{
       
  8043     int vector_len = 1;
       
  8044     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  8045   %}
       
  8046   ins_pipe( pipe_slow );
       
  8047 %}
       
  8048 
       
  8049 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
       
  8050   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  8051   match(Set dst (MulVS dst (LoadVector mem)));
       
  8052   effect(TEMP src);
       
  8053   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
       
  8054   ins_encode %{
       
  8055     int vector_len = 1;
       
  8056     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
       
  8057   %}
       
  8058   ins_pipe( pipe_slow );
       
  8059 %}
       
  8060 
  7040 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
  8061 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
  7041   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  8062   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  7042   match(Set dst (MulVS src1 src2));
  8063   match(Set dst (MulVS src1 src2));
  7043   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
  8064   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
  7044   ins_encode %{
  8065   ins_encode %{
  7045     int vector_len = 2;
  8066     int vector_len = 2;
  7046     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  8067     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
  7047   %}
  8068   %}
  7048   ins_pipe( pipe_slow );
  8069   ins_pipe( pipe_slow );
  7049 %}
  8070 %}
  7050 
  8071 
  7051 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
  8072 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
  7052   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  8073   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  7053   match(Set dst (MulVS src (LoadVector mem)));
  8074   match(Set dst (MulVS src (LoadVector mem)));
  7054   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
  8075   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
  7055   ins_encode %{
  8076   ins_encode %{
  7056     int vector_len = 2;
  8077     int vector_len = 2;
  7057     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  8078     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
  7709 
  8730 
  7710 // ------------------------------ LeftShift -----------------------------------
  8731 // ------------------------------ LeftShift -----------------------------------
  7711 
  8732 
  7712 // Shorts/Chars vector left shift
  8733 // Shorts/Chars vector left shift
  7713 instruct vsll2S(vecS dst, vecS shift) %{
  8734 instruct vsll2S(vecS dst, vecS shift) %{
  7714   predicate(n->as_Vector()->length() == 2);
  8735   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
  7715   match(Set dst (LShiftVS dst shift));
  8736   match(Set dst (LShiftVS dst shift));
  7716   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  8737   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  7717   ins_encode %{
  8738   ins_encode %{
  7718     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  8739     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  7719   %}
  8740   %}
  7720   ins_pipe( pipe_slow );
  8741   ins_pipe( pipe_slow );
  7721 %}
  8742 %}
  7722 
  8743 
  7723 instruct vsll2S_imm(vecS dst, immI8 shift) %{
  8744 instruct vsll2S_imm(vecS dst, immI8 shift) %{
  7724   predicate(n->as_Vector()->length() == 2);
  8745   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
  7725   match(Set dst (LShiftVS dst shift));
  8746   match(Set dst (LShiftVS dst shift));
  7726   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  8747   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
  7727   ins_encode %{
  8748   ins_encode %{
  7728     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  8749     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  7729   %}
  8750   %}
  7730   ins_pipe( pipe_slow );
  8751   ins_pipe( pipe_slow );
  7731 %}
  8752 %}
  7732 
  8753 
  7733 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
  8754 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{
  7734   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  8755   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  7735   match(Set dst (LShiftVS src shift));
  8756   match(Set dst (LShiftVS src shift));
  7736   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  8757   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  7737   ins_encode %{
  8758   ins_encode %{
  7738     int vector_len = 0;
  8759     int vector_len = 0;
  7739     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8760     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  7740   %}
  8761   %}
  7741   ins_pipe( pipe_slow );
  8762   ins_pipe( pipe_slow );
  7742 %}
  8763 %}
  7743 
  8764 
  7744 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  8765 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{
  7745   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  8766   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
  7746   match(Set dst (LShiftVS src shift));
  8767   match(Set dst (LShiftVS src shift));
  7747   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  8768   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
  7748   ins_encode %{
  8769   ins_encode %{
  7749     int vector_len = 0;
  8770     int vector_len = 0;
       
  8771     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  8772   %}
       
  8773   ins_pipe( pipe_slow );
       
  8774 %}
       
  8775 
       
  8776 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
       
  8777   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  8778   match(Set dst (LShiftVS dst shift));
       
  8779   effect(TEMP src);
       
  8780   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
       
  8781   ins_encode %{
       
  8782     int vector_len = 0;
       
  8783     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  8784   %}
       
  8785   ins_pipe( pipe_slow );
       
  8786 %}
       
  8787 
       
  8788 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
       
  8789   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
       
  8790   match(Set dst (LShiftVS src shift));
       
  8791   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
       
  8792   ins_encode %{
       
  8793     int vector_len = 0;
  7750     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8794     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  7751   %}
  8795   %}
  7752   ins_pipe( pipe_slow );
  8796   ins_pipe( pipe_slow );
  7753 %}
  8797 %}
  7754 
  8798 
       
  8799 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
       
  8800   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  8801   match(Set dst (LShiftVS src shift));
       
  8802   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
       
  8803   ins_encode %{
       
  8804     int vector_len = 0;
       
  8805     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  8806   %}
       
  8807   ins_pipe( pipe_slow );
       
  8808 %}
       
  8809 
       
  8810 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
       
  8811   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  8812   match(Set dst (LShiftVS dst shift));
       
  8813   effect(TEMP src);
       
  8814   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
       
  8815   ins_encode %{
       
  8816     int vector_len = 0;
       
  8817     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  8818   %}
       
  8819   ins_pipe( pipe_slow );
       
  8820 %}
       
  8821 
  7755 instruct vsll4S(vecD dst, vecS shift) %{
  8822 instruct vsll4S(vecD dst, vecS shift) %{
  7756   predicate(n->as_Vector()->length() == 4);
  8823   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  7757   match(Set dst (LShiftVS dst shift));
  8824   match(Set dst (LShiftVS dst shift));
  7758   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  8825   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  7759   ins_encode %{
  8826   ins_encode %{
  7760     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  8827     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  7761   %}
  8828   %}
  7762   ins_pipe( pipe_slow );
  8829   ins_pipe( pipe_slow );
  7763 %}
  8830 %}
  7764 
  8831 
  7765 instruct vsll4S_imm(vecD dst, immI8 shift) %{
  8832 instruct vsll4S_imm(vecD dst, immI8 shift) %{
  7766   predicate(n->as_Vector()->length() == 4);
  8833   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  7767   match(Set dst (LShiftVS dst shift));
  8834   match(Set dst (LShiftVS dst shift));
  7768   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  8835   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
  7769   ins_encode %{
  8836   ins_encode %{
  7770     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  8837     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  7771   %}
  8838   %}
  7772   ins_pipe( pipe_slow );
  8839   ins_pipe( pipe_slow );
  7773 %}
  8840 %}
  7774 
  8841 
  7775 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
  8842 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{
  7776   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  8843   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  7777   match(Set dst (LShiftVS src shift));
  8844   match(Set dst (LShiftVS src shift));
  7778   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  8845   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  7779   ins_encode %{
  8846   ins_encode %{
  7780     int vector_len = 0;
  8847     int vector_len = 0;
  7781     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8848     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  7782   %}
  8849   %}
  7783   ins_pipe( pipe_slow );
  8850   ins_pipe( pipe_slow );
  7784 %}
  8851 %}
  7785 
  8852 
  7786 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  8853 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{
  7787   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  8854   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
  7788   match(Set dst (LShiftVS src shift));
  8855   match(Set dst (LShiftVS src shift));
  7789   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  8856   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
  7790   ins_encode %{
  8857   ins_encode %{
  7791     int vector_len = 0;
  8858     int vector_len = 0;
       
  8859     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  8860   %}
       
  8861   ins_pipe( pipe_slow );
       
  8862 %}
       
  8863 
       
  8864 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
       
  8865   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  8866   match(Set dst (LShiftVS dst shift));
       
  8867   effect(TEMP src);
       
  8868   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
       
  8869   ins_encode %{
       
  8870     int vector_len = 0;
       
  8871     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  8872   %}
       
  8873   ins_pipe( pipe_slow );
       
  8874 %}
       
  8875 
       
  8876 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
       
  8877   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
       
  8878   match(Set dst (LShiftVS src shift));
       
  8879   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
       
  8880   ins_encode %{
       
  8881     int vector_len = 0;
  7792     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8882     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  7793   %}
  8883   %}
  7794   ins_pipe( pipe_slow );
  8884   ins_pipe( pipe_slow );
  7795 %}
  8885 %}
  7796 
  8886 
       
  8887 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
       
  8888   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  8889   match(Set dst (LShiftVS src shift));
       
  8890   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
       
  8891   ins_encode %{
       
  8892     int vector_len = 0;
       
  8893     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  8894   %}
       
  8895   ins_pipe( pipe_slow );
       
  8896 %}
       
  8897 
       
  8898 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
       
  8899   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  8900   match(Set dst (LShiftVS dst shift));
       
  8901   effect(TEMP src);
       
  8902   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
       
  8903   ins_encode %{
       
  8904     int vector_len = 0;
       
  8905     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  8906   %}
       
  8907   ins_pipe( pipe_slow );
       
  8908 %}
       
  8909 
  7797 instruct vsll8S(vecX dst, vecS shift) %{
  8910 instruct vsll8S(vecX dst, vecS shift) %{
  7798   predicate(n->as_Vector()->length() == 8);
  8911   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  7799   match(Set dst (LShiftVS dst shift));
  8912   match(Set dst (LShiftVS dst shift));
  7800   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  8913   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  7801   ins_encode %{
  8914   ins_encode %{
  7802     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  8915     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
  7803   %}
  8916   %}
  7804   ins_pipe( pipe_slow );
  8917   ins_pipe( pipe_slow );
  7805 %}
  8918 %}
  7806 
  8919 
  7807 instruct vsll8S_imm(vecX dst, immI8 shift) %{
  8920 instruct vsll8S_imm(vecX dst, immI8 shift) %{
  7808   predicate(n->as_Vector()->length() == 8);
  8921   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  7809   match(Set dst (LShiftVS dst shift));
  8922   match(Set dst (LShiftVS dst shift));
  7810   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  8923   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
  7811   ins_encode %{
  8924   ins_encode %{
  7812     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  8925     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
  7813   %}
  8926   %}
  7814   ins_pipe( pipe_slow );
  8927   ins_pipe( pipe_slow );
  7815 %}
  8928 %}
  7816 
  8929 
  7817 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
  8930 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{
  7818   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  8931   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  7819   match(Set dst (LShiftVS src shift));
  8932   match(Set dst (LShiftVS src shift));
  7820   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  8933   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  7821   ins_encode %{
  8934   ins_encode %{
  7822     int vector_len = 0;
  8935     int vector_len = 0;
  7823     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8936     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  7824   %}
  8937   %}
  7825   ins_pipe( pipe_slow );
  8938   ins_pipe( pipe_slow );
  7826 %}
  8939 %}
  7827 
  8940 
  7828 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  8941 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{
  7829   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  8942   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
  7830   match(Set dst (LShiftVS src shift));
  8943   match(Set dst (LShiftVS src shift));
  7831   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  8944   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
  7832   ins_encode %{
  8945   ins_encode %{
  7833     int vector_len = 0;
  8946     int vector_len = 0;
       
  8947     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  8948   %}
       
  8949   ins_pipe( pipe_slow );
       
  8950 %}
       
  8951 
       
  8952 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
       
  8953   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  8954   match(Set dst (LShiftVS dst shift));
       
  8955   effect(TEMP src);
       
  8956   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
       
  8957   ins_encode %{
       
  8958     int vector_len = 0;
       
  8959     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  8960   %}
       
  8961   ins_pipe( pipe_slow );
       
  8962 %}
       
  8963 
       
  8964 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
       
  8965   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
       
  8966   match(Set dst (LShiftVS src shift));
       
  8967   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
       
  8968   ins_encode %{
       
  8969     int vector_len = 0;
  7834     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8970     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  7835   %}
  8971   %}
  7836   ins_pipe( pipe_slow );
  8972   ins_pipe( pipe_slow );
  7837 %}
  8973 %}
  7838 
  8974 
  7839 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
  8975 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
  7840   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  8976   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  8977   match(Set dst (LShiftVS src shift));
       
  8978   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
       
  8979   ins_encode %{
       
  8980     int vector_len = 0;
       
  8981     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  8982   %}
       
  8983   ins_pipe( pipe_slow );
       
  8984 %}
       
  8985 
       
  8986 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
       
  8987   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  8988   match(Set dst (LShiftVS dst shift));
       
  8989   effect(TEMP src);
       
  8990   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
       
  8991   ins_encode %{
       
  8992     int vector_len = 0;
       
  8993     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  8994   %}
       
  8995   ins_pipe( pipe_slow );
       
  8996 %}
       
  8997 
       
  8998 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{
       
  8999   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  7841   match(Set dst (LShiftVS src shift));
  9000   match(Set dst (LShiftVS src shift));
  7842   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  9001   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  7843   ins_encode %{
  9002   ins_encode %{
  7844     int vector_len = 1;
  9003     int vector_len = 1;
  7845     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9004     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  7846   %}
  9005   %}
  7847   ins_pipe( pipe_slow );
  9006   ins_pipe( pipe_slow );
  7848 %}
  9007 %}
  7849 
  9008 
  7850 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  9009 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{
  7851   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  9010   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
  7852   match(Set dst (LShiftVS src shift));
  9011   match(Set dst (LShiftVS src shift));
  7853   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  9012   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
  7854   ins_encode %{
  9013   ins_encode %{
  7855     int vector_len = 1;
  9014     int vector_len = 1;
       
  9015     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9016   %}
       
  9017   ins_pipe( pipe_slow );
       
  9018 %}
       
  9019 
       
  9020 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
       
  9021   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  9022   match(Set dst (LShiftVS dst shift));
       
  9023   effect(TEMP src);
       
  9024   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
       
  9025   ins_encode %{
       
  9026     int vector_len = 1;
       
  9027     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9028   %}
       
  9029   ins_pipe( pipe_slow );
       
  9030 %}
       
  9031 
       
  9032 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
       
  9033   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
       
  9034   match(Set dst (LShiftVS src shift));
       
  9035   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
       
  9036   ins_encode %{
       
  9037     int vector_len = 1;
  7856     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  9038     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  7857   %}
  9039   %}
  7858   ins_pipe( pipe_slow );
  9040   ins_pipe( pipe_slow );
  7859 %}
  9041 %}
  7860 
  9042 
       
  9043 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
       
  9044   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  9045   match(Set dst (LShiftVS src shift));
       
  9046   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
       
  9047   ins_encode %{
       
  9048     int vector_len = 1;
       
  9049     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9050   %}
       
  9051   ins_pipe( pipe_slow );
       
  9052 %}
       
  9053 
       
  9054 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
       
  9055   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  9056   match(Set dst (LShiftVS dst shift));
       
  9057   effect(TEMP src);
       
  9058   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
       
  9059   ins_encode %{
       
  9060     int vector_len = 1;
       
  9061     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9062   %}
       
  9063   ins_pipe( pipe_slow );
       
  9064 %}
       
  9065 
  7861 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
  9066 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
  7862   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  9067   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  7863   match(Set dst (LShiftVS src shift));
  9068   match(Set dst (LShiftVS src shift));
  7864   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
  9069   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
  7865   ins_encode %{
  9070   ins_encode %{
  7866     int vector_len = 2;
  9071     int vector_len = 2;
  7867     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9072     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  7868   %}
  9073   %}
  7869   ins_pipe( pipe_slow );
  9074   ins_pipe( pipe_slow );
  7870 %}
  9075 %}
  7871 
  9076 
  7872 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
  9077 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
  7873   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  9078   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  7874   match(Set dst (LShiftVS src shift));
  9079   match(Set dst (LShiftVS src shift));
  7875   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
  9080   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
  7876   ins_encode %{
  9081   ins_encode %{
  7877     int vector_len = 2;
  9082     int vector_len = 2;
  7878     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  9083     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8102 // for negative data because java code convert short value into int with
  9307 // for negative data because java code convert short value into int with
  8103 // sign extension before a shift. But char vectors are fine since chars are
  9308 // sign extension before a shift. But char vectors are fine since chars are
  8104 // unsigned values.
  9309 // unsigned values.
  8105 
  9310 
  8106 instruct vsrl2S(vecS dst, vecS shift) %{
  9311 instruct vsrl2S(vecS dst, vecS shift) %{
  8107   predicate(n->as_Vector()->length() == 2);
  9312   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
  8108   match(Set dst (URShiftVS dst shift));
  9313   match(Set dst (URShiftVS dst shift));
  8109   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
  9314   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
  8110   ins_encode %{
  9315   ins_encode %{
  8111     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  9316     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  8112   %}
  9317   %}
  8113   ins_pipe( pipe_slow );
  9318   ins_pipe( pipe_slow );
  8114 %}
  9319 %}
  8115 
  9320 
  8116 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
  9321 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
  8117   predicate(n->as_Vector()->length() == 2);
  9322   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
  8118   match(Set dst (URShiftVS dst shift));
  9323   match(Set dst (URShiftVS dst shift));
  8119   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
  9324   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
  8120   ins_encode %{
  9325   ins_encode %{
  8121     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  9326     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  8122   %}
  9327   %}
  8123   ins_pipe( pipe_slow );
  9328   ins_pipe( pipe_slow );
  8124 %}
  9329 %}
  8125 
  9330 
  8126 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
  9331 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{
  8127   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  9332   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  8128   match(Set dst (URShiftVS src shift));
  9333   match(Set dst (URShiftVS src shift));
  8129   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
  9334   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
  8130   ins_encode %{
  9335   ins_encode %{
  8131     int vector_len = 0;
  9336     int vector_len = 0;
  8132     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9337     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8133   %}
  9338   %}
  8134   ins_pipe( pipe_slow );
  9339   ins_pipe( pipe_slow );
  8135 %}
  9340 %}
  8136 
  9341 
  8137 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  9342 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{
  8138   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  9343   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
  8139   match(Set dst (URShiftVS src shift));
  9344   match(Set dst (URShiftVS src shift));
  8140   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
  9345   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
  8141   ins_encode %{
  9346   ins_encode %{
  8142     int vector_len = 0;
  9347     int vector_len = 0;
       
  9348     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9349   %}
       
  9350   ins_pipe( pipe_slow );
       
  9351 %}
       
  9352 
       
  9353 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
       
  9354   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  9355   match(Set dst (URShiftVS dst shift));
       
  9356   effect(TEMP src);
       
  9357   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
       
  9358   ins_encode %{
       
  9359     int vector_len = 0;
       
  9360     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9361   %}
       
  9362   ins_pipe( pipe_slow );
       
  9363 %}
       
  9364 
       
  9365 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
       
  9366   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
       
  9367   match(Set dst (URShiftVS src shift));
       
  9368   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
       
  9369   ins_encode %{
       
  9370     int vector_len = 0;
  8143     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  9371     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8144   %}
  9372   %}
  8145   ins_pipe( pipe_slow );
  9373   ins_pipe( pipe_slow );
  8146 %}
  9374 %}
  8147 
  9375 
       
  9376 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
       
  9377   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  9378   match(Set dst (URShiftVS src shift));
       
  9379   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
       
  9380   ins_encode %{
       
  9381     int vector_len = 0;
       
  9382     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9383   %}
       
  9384   ins_pipe( pipe_slow );
       
  9385 %}
       
  9386 
       
  9387 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
       
  9388   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  9389   match(Set dst (URShiftVS dst shift));
       
  9390   effect(TEMP src);
       
  9391   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
       
  9392   ins_encode %{
       
  9393     int vector_len = 0;
       
  9394     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9395   %}
       
  9396   ins_pipe( pipe_slow );
       
  9397 %}
       
  9398 
  8148 instruct vsrl4S(vecD dst, vecS shift) %{
  9399 instruct vsrl4S(vecD dst, vecS shift) %{
  8149   predicate(n->as_Vector()->length() == 4);
  9400   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  8150   match(Set dst (URShiftVS dst shift));
  9401   match(Set dst (URShiftVS dst shift));
  8151   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
  9402   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
  8152   ins_encode %{
  9403   ins_encode %{
  8153     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  9404     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  8154   %}
  9405   %}
  8155   ins_pipe( pipe_slow );
  9406   ins_pipe( pipe_slow );
  8156 %}
  9407 %}
  8157 
  9408 
  8158 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
  9409 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
  8159   predicate(n->as_Vector()->length() == 4);
  9410   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  8160   match(Set dst (URShiftVS dst shift));
  9411   match(Set dst (URShiftVS dst shift));
  8161   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
  9412   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
  8162   ins_encode %{
  9413   ins_encode %{
  8163     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  9414     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  8164   %}
  9415   %}
  8165   ins_pipe( pipe_slow );
  9416   ins_pipe( pipe_slow );
  8166 %}
  9417 %}
  8167 
  9418 
  8168 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
  9419 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{
  8169   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  9420   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  8170   match(Set dst (URShiftVS src shift));
  9421   match(Set dst (URShiftVS src shift));
  8171   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
  9422   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
  8172   ins_encode %{
  9423   ins_encode %{
  8173     int vector_len = 0;
  9424     int vector_len = 0;
  8174     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9425     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8175   %}
  9426   %}
  8176   ins_pipe( pipe_slow );
  9427   ins_pipe( pipe_slow );
  8177 %}
  9428 %}
  8178 
  9429 
  8179 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
  9430 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{
  8180   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  9431   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
  8181   match(Set dst (URShiftVS src shift));
  9432   match(Set dst (URShiftVS src shift));
  8182   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
  9433   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
  8183   ins_encode %{
  9434   ins_encode %{
  8184     int vector_len = 0;
  9435     int vector_len = 0;
       
  9436     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9437   %}
       
  9438   ins_pipe( pipe_slow );
       
  9439 %}
       
  9440 
       
  9441 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
       
  9442   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  9443   match(Set dst (URShiftVS dst shift));
       
  9444   effect(TEMP src);
       
  9445   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
       
  9446   ins_encode %{
       
  9447     int vector_len = 0;
       
  9448     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9449   %}
       
  9450   ins_pipe( pipe_slow );
       
  9451 %}
       
  9452 
       
  9453 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
       
  9454   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
       
  9455   match(Set dst (URShiftVS src shift));
       
  9456   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
       
  9457   ins_encode %{
       
  9458     int vector_len = 0;
  8185     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  9459     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8186   %}
  9460   %}
  8187   ins_pipe( pipe_slow );
  9461   ins_pipe( pipe_slow );
  8188 %}
  9462 %}
  8189 
  9463 
       
  9464 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
       
  9465   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
  9466   match(Set dst (URShiftVS src shift));
       
  9467   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
       
  9468   ins_encode %{
       
  9469     int vector_len = 0;
       
  9470     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9471   %}
       
  9472   ins_pipe( pipe_slow );
       
  9473 %}
       
  9474 
       
  9475 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
       
  9476   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
  9477   match(Set dst (URShiftVS dst shift));
       
  9478   effect(TEMP src);
       
  9479   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
       
  9480   ins_encode %{
       
  9481     int vector_len = 0;
       
  9482     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9483   %}
       
  9484   ins_pipe( pipe_slow );
       
  9485 %}
       
  9486 
  8190 instruct vsrl8S(vecX dst, vecS shift) %{
  9487 instruct vsrl8S(vecX dst, vecS shift) %{
  8191   predicate(n->as_Vector()->length() == 8);
  9488   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  8192   match(Set dst (URShiftVS dst shift));
  9489   match(Set dst (URShiftVS dst shift));
  8193   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
  9490   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
  8194   ins_encode %{
  9491   ins_encode %{
  8195     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  9492     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
  8196   %}
  9493   %}
  8197   ins_pipe( pipe_slow );
  9494   ins_pipe( pipe_slow );
  8198 %}
  9495 %}
  8199 
  9496 
  8200 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
  9497 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
  8201   predicate(n->as_Vector()->length() == 8);
  9498   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  8202   match(Set dst (URShiftVS dst shift));
  9499   match(Set dst (URShiftVS dst shift));
  8203   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
  9500   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
  8204   ins_encode %{
  9501   ins_encode %{
  8205     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  9502     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
  8206   %}
  9503   %}
  8207   ins_pipe( pipe_slow );
  9504   ins_pipe( pipe_slow );
  8208 %}
  9505 %}
  8209 
  9506 
  8210 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
  9507 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{
  8211   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  9508   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  8212   match(Set dst (URShiftVS src shift));
  9509   match(Set dst (URShiftVS src shift));
  8213   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
  9510   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
  8214   ins_encode %{
  9511   ins_encode %{
  8215     int vector_len = 0;
  9512     int vector_len = 0;
  8216     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9513     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8217   %}
  9514   %}
  8218   ins_pipe( pipe_slow );
  9515   ins_pipe( pipe_slow );
  8219 %}
  9516 %}
  8220 
  9517 
  8221 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
  9518 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{
  8222   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
  9519   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
  8223   match(Set dst (URShiftVS src shift));
  9520   match(Set dst (URShiftVS src shift));
  8224   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
  9521   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
  8225   ins_encode %{
  9522   ins_encode %{
  8226     int vector_len = 0;
  9523     int vector_len = 0;
       
  9524     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9525   %}
       
  9526   ins_pipe( pipe_slow );
       
  9527 %}
       
  9528 
       
  9529 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
       
  9530   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  9531   match(Set dst (URShiftVS dst shift));
       
  9532   effect(TEMP src);
       
  9533   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
       
  9534   ins_encode %{
       
  9535     int vector_len = 0;
       
  9536     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9537   %}
       
  9538   ins_pipe( pipe_slow );
       
  9539 %}
       
  9540 
       
  9541 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
       
  9542   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
       
  9543   match(Set dst (URShiftVS src shift));
       
  9544   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
       
  9545   ins_encode %{
       
  9546     int vector_len = 0;
  8227     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  9547     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8228   %}
  9548   %}
  8229   ins_pipe( pipe_slow );
  9549   ins_pipe( pipe_slow );
  8230 %}
  9550 %}
  8231 
  9551 
  8232 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
  9552 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
  8233   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  9553   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
  9554   match(Set dst (URShiftVS src shift));
       
  9555   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
       
  9556   ins_encode %{
       
  9557     int vector_len = 0;
       
  9558     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9559   %}
       
  9560   ins_pipe( pipe_slow );
       
  9561 %}
       
  9562 
       
  9563 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
       
  9564   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
  9565   match(Set dst (URShiftVS dst shift));
       
  9566   effect(TEMP src);
       
  9567   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
       
  9568   ins_encode %{
       
  9569     int vector_len = 0;
       
  9570     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9571   %}
       
  9572   ins_pipe( pipe_slow );
       
  9573 %}
       
  9574 
       
  9575 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{
       
  9576   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  8234   match(Set dst (URShiftVS src shift));
  9577   match(Set dst (URShiftVS src shift));
  8235   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
  9578   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
  8236   ins_encode %{
  9579   ins_encode %{
  8237     int vector_len = 1;
  9580     int vector_len = 1;
  8238     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9581     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8239   %}
  9582   %}
  8240   ins_pipe( pipe_slow );
  9583   ins_pipe( pipe_slow );
  8241 %}
  9584 %}
  8242 
  9585 
  8243 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
  9586 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{
  8244   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  9587   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
  8245   match(Set dst (URShiftVS src shift));
  9588   match(Set dst (URShiftVS src shift));
  8246   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
  9589   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
  8247   ins_encode %{
  9590   ins_encode %{
  8248     int vector_len = 1;
  9591     int vector_len = 1;
       
  9592     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9593   %}
       
  9594   ins_pipe( pipe_slow );
       
  9595 %}
       
  9596 
       
  9597 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
       
  9598   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  9599   match(Set dst (URShiftVS dst shift));
       
  9600   effect(TEMP src);
       
  9601   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
       
  9602   ins_encode %{
       
  9603     int vector_len = 1;
       
  9604     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9605   %}
       
  9606   ins_pipe( pipe_slow );
       
  9607 %}
       
  9608 
       
  9609 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
       
  9610   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
       
  9611   match(Set dst (URShiftVS src shift));
       
  9612   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
       
  9613   ins_encode %{
       
  9614     int vector_len = 1;
  8249     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  9615     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8250   %}
  9616   %}
  8251   ins_pipe( pipe_slow );
  9617   ins_pipe( pipe_slow );
  8252 %}
  9618 %}
  8253 
  9619 
       
  9620 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
       
  9621   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
  9622   match(Set dst (URShiftVS src shift));
       
  9623   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
       
  9624   ins_encode %{
       
  9625     int vector_len = 1;
       
  9626     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9627   %}
       
  9628   ins_pipe( pipe_slow );
       
  9629 %}
       
  9630 
       
  9631 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
       
  9632   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
  9633   match(Set dst (URShiftVS dst shift));
       
  9634   effect(TEMP src);
       
  9635   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
       
  9636   ins_encode %{
       
  9637     int vector_len = 1;
       
  9638     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9639   %}
       
  9640   ins_pipe( pipe_slow );
       
  9641 %}
       
  9642 
  8254 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
  9643 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
  8255   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  9644   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  8256   match(Set dst (URShiftVS src shift));
  9645   match(Set dst (URShiftVS src shift));
  8257   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
  9646   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
  8258   ins_encode %{
  9647   ins_encode %{
  8259     int vector_len = 2;
  9648     int vector_len = 2;
  8260     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9649     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8261   %}
  9650   %}
  8262   ins_pipe( pipe_slow );
  9651   ins_pipe( pipe_slow );
  8263 %}
  9652 %}
  8264 
  9653 
  8265 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
  9654 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
  8266   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
  9655   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  8267   match(Set dst (URShiftVS src shift));
  9656   match(Set dst (URShiftVS src shift));
  8268   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
  9657   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
  8269   ins_encode %{
  9658   ins_encode %{
  8270     int vector_len = 2;
  9659     int vector_len = 2;
  8271     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  9660     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8491 
  9880 
  8492 // ------------------- ArithmeticRightShift -----------------------------------
  9881 // ------------------- ArithmeticRightShift -----------------------------------
  8493 
  9882 
  8494 // Shorts/Chars vector arithmetic right shift
  9883 // Shorts/Chars vector arithmetic right shift
  8495 instruct vsra2S(vecS dst, vecS shift) %{
  9884 instruct vsra2S(vecS dst, vecS shift) %{
  8496   predicate(n->as_Vector()->length() == 2);
  9885   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
  8497   match(Set dst (RShiftVS dst shift));
  9886   match(Set dst (RShiftVS dst shift));
  8498   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
  9887   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
  8499   ins_encode %{
  9888   ins_encode %{
  8500     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  9889     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  8501   %}
  9890   %}
  8510     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  9899     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  8511   %}
  9900   %}
  8512   ins_pipe( pipe_slow );
  9901   ins_pipe( pipe_slow );
  8513 %}
  9902 %}
  8514 
  9903 
  8515 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
  9904 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{
  8516   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  9905   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
  8517   match(Set dst (RShiftVS src shift));
  9906   match(Set dst (RShiftVS src shift));
  8518   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  9907   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  8519   ins_encode %{
  9908   ins_encode %{
  8520     int vector_len = 0;
  9909     int vector_len = 0;
  8521     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9910     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8522   %}
  9911   %}
  8523   ins_pipe( pipe_slow );
  9912   ins_pipe( pipe_slow );
  8524 %}
  9913 %}
  8525 
  9914 
  8526 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
  9915 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{
  8527   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
  9916   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
  8528   match(Set dst (RShiftVS src shift));
  9917   match(Set dst (RShiftVS src shift));
  8529   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  9918   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
  8530   ins_encode %{
  9919   ins_encode %{
  8531     int vector_len = 0;
  9920     int vector_len = 0;
       
  9921     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9922   %}
       
  9923   ins_pipe( pipe_slow );
       
  9924 %}
       
  9925 
       
  9926 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
       
  9927   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  9928   match(Set dst (RShiftVS dst shift));
       
  9929   effect(TEMP src);
       
  9930   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
       
  9931   ins_encode %{
       
  9932     int vector_len = 0;
       
  9933     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
  9934   %}
       
  9935   ins_pipe( pipe_slow );
       
  9936 %}
       
  9937 
       
  9938 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
       
  9939   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
       
  9940   match(Set dst (RShiftVS src shift));
       
  9941   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
       
  9942   ins_encode %{
       
  9943     int vector_len = 0;
  8532     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  9944     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8533   %}
  9945   %}
  8534   ins_pipe( pipe_slow );
  9946   ins_pipe( pipe_slow );
  8535 %}
  9947 %}
  8536 
  9948 
       
  9949 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
       
  9950   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
       
  9951   match(Set dst (RShiftVS src shift));
       
  9952   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
       
  9953   ins_encode %{
       
  9954     int vector_len = 0;
       
  9955     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9956   %}
       
  9957   ins_pipe( pipe_slow );
       
  9958 %}
       
  9959 
       
  9960 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
       
  9961   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
       
  9962   match(Set dst (RShiftVS dst shift));
       
  9963   effect(TEMP src);
       
  9964   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
       
  9965   ins_encode %{
       
  9966     int vector_len = 0;
       
  9967     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
  9968   %}
       
  9969   ins_pipe( pipe_slow );
       
  9970 %}
       
  9971 
  8537 instruct vsra4S(vecD dst, vecS shift) %{
  9972 instruct vsra4S(vecD dst, vecS shift) %{
  8538   predicate(n->as_Vector()->length() == 4);
  9973   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  8539   match(Set dst (RShiftVS dst shift));
  9974   match(Set dst (RShiftVS dst shift));
  8540   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  9975   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  8541   ins_encode %{
  9976   ins_encode %{
  8542     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  9977     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  8543   %}
  9978   %}
  8544   ins_pipe( pipe_slow );
  9979   ins_pipe( pipe_slow );
  8545 %}
  9980 %}
  8546 
  9981 
  8547 instruct vsra4S_imm(vecD dst, immI8 shift) %{
  9982 instruct vsra4S_imm(vecD dst, immI8 shift) %{
  8548   predicate(n->as_Vector()->length() == 4);
  9983   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
  8549   match(Set dst (RShiftVS dst shift));
  9984   match(Set dst (RShiftVS dst shift));
  8550   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  9985   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
  8551   ins_encode %{
  9986   ins_encode %{
  8552     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  9987     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  8553   %}
  9988   %}
  8554   ins_pipe( pipe_slow );
  9989   ins_pipe( pipe_slow );
  8555 %}
  9990 %}
  8556 
  9991 
  8557 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
  9992 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{
  8558   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
  9993   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
  8559   match(Set dst (RShiftVS src shift));
  9994   match(Set dst (RShiftVS src shift));
  8560   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  9995   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  8561   ins_encode %{
  9996   ins_encode %{
  8562     int vector_len = 0;
  9997     int vector_len = 0;
  8563     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  9998     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8564   %}
  9999   %}
  8565   ins_pipe( pipe_slow );
 10000   ins_pipe( pipe_slow );
  8566 %}
 10001 %}
  8567 
 10002 
  8568 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
 10003 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{
  8569   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
 10004   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
  8570   match(Set dst (RShiftVS src shift));
 10005   match(Set dst (RShiftVS src shift));
  8571   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
 10006   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
  8572   ins_encode %{
 10007   ins_encode %{
  8573     int vector_len = 0;
 10008     int vector_len = 0;
       
 10009     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
 10010   %}
       
 10011   ins_pipe( pipe_slow );
       
 10012 %}
       
 10013 
       
 10014 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
       
 10015   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
 10016   match(Set dst (RShiftVS dst shift));
       
 10017   effect(TEMP src);
       
 10018   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
       
 10019   ins_encode %{
       
 10020     int vector_len = 0;
       
 10021     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
 10022   %}
       
 10023   ins_pipe( pipe_slow );
       
 10024 %}
       
 10025 
       
 10026 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
       
 10027   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
       
 10028   match(Set dst (RShiftVS src shift));
       
 10029   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
       
 10030   ins_encode %{
       
 10031     int vector_len = 0;
  8574     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
 10032     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8575   %}
 10033   %}
  8576   ins_pipe( pipe_slow );
 10034   ins_pipe( pipe_slow );
  8577 %}
 10035 %}
  8578 
 10036 
       
 10037 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
       
 10038   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
       
 10039   match(Set dst (RShiftVS src shift));
       
 10040   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
       
 10041   ins_encode %{
       
 10042     int vector_len = 0;
       
 10043     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
 10044   %}
       
 10045   ins_pipe( pipe_slow );
       
 10046 %}
       
 10047 
       
 10048 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
       
 10049   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
       
 10050   match(Set dst (RShiftVS dst shift));
       
 10051   effect(TEMP src);
       
 10052   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
       
 10053   ins_encode %{
       
 10054     int vector_len = 0;
       
 10055     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
 10056   %}
       
 10057   ins_pipe( pipe_slow );
       
 10058 %}
       
 10059 
  8579 instruct vsra8S(vecX dst, vecS shift) %{
 10060 instruct vsra8S(vecX dst, vecS shift) %{
  8580   predicate(n->as_Vector()->length() == 8);
 10061   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  8581   match(Set dst (RShiftVS dst shift));
 10062   match(Set dst (RShiftVS dst shift));
  8582   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
 10063   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
  8583   ins_encode %{
 10064   ins_encode %{
  8584     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
 10065     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
  8585   %}
 10066   %}
  8586   ins_pipe( pipe_slow );
 10067   ins_pipe( pipe_slow );
  8587 %}
 10068 %}
  8588 
 10069 
  8589 instruct vsra8S_imm(vecX dst, immI8 shift) %{
 10070 instruct vsra8S_imm(vecX dst, immI8 shift) %{
  8590   predicate(n->as_Vector()->length() == 8);
 10071   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
  8591   match(Set dst (RShiftVS dst shift));
 10072   match(Set dst (RShiftVS dst shift));
  8592   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
 10073   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
  8593   ins_encode %{
 10074   ins_encode %{
  8594     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
 10075     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
  8595   %}
 10076   %}
  8596   ins_pipe( pipe_slow );
 10077   ins_pipe( pipe_slow );
  8597 %}
 10078 %}
  8598 
 10079 
  8599 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
 10080 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{
  8600   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
 10081   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
  8601   match(Set dst (RShiftVS src shift));
 10082   match(Set dst (RShiftVS src shift));
  8602   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
 10083   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
  8603   ins_encode %{
 10084   ins_encode %{
  8604     int vector_len = 0;
 10085     int vector_len = 0;
  8605     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
 10086     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8606   %}
 10087   %}
  8607   ins_pipe( pipe_slow );
 10088   ins_pipe( pipe_slow );
  8608 %}
 10089 %}
  8609 
 10090 
  8610 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
 10091 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{
  8611   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
 10092   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
  8612   match(Set dst (RShiftVS src shift));
 10093   match(Set dst (RShiftVS src shift));
  8613   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
 10094   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
  8614   ins_encode %{
 10095   ins_encode %{
  8615     int vector_len = 0;
 10096     int vector_len = 0;
       
 10097     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
 10098   %}
       
 10099   ins_pipe( pipe_slow );
       
 10100 %}
       
 10101 
       
 10102 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
       
 10103   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
 10104   match(Set dst (RShiftVS dst shift));
       
 10105   effect(TEMP src);
       
 10106   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
       
 10107   ins_encode %{
       
 10108     int vector_len = 0;
       
 10109     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
 10110   %}
       
 10111   ins_pipe( pipe_slow );
       
 10112 %}
       
 10113 
       
 10114 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
       
 10115   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
       
 10116   match(Set dst (RShiftVS src shift));
       
 10117   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
       
 10118   ins_encode %{
       
 10119     int vector_len = 0;
  8616     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
 10120     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8617   %}
 10121   %}
  8618   ins_pipe( pipe_slow );
 10122   ins_pipe( pipe_slow );
  8619 %}
 10123 %}
  8620 
 10124 
  8621 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
 10125 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
  8622   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
 10126   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
       
 10127   match(Set dst (RShiftVS src shift));
       
 10128   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
       
 10129   ins_encode %{
       
 10130     int vector_len = 0;
       
 10131     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
 10132   %}
       
 10133   ins_pipe( pipe_slow );
       
 10134 %}
       
 10135 
       
 10136 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
       
 10137   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
       
 10138   match(Set dst (RShiftVS dst shift));
       
 10139   effect(TEMP src);
       
 10140   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
       
 10141   ins_encode %{
       
 10142     int vector_len = 0;
       
 10143     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
 10144   %}
       
 10145   ins_pipe( pipe_slow );
       
 10146 %}
       
 10147 
       
 10148 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{
       
 10149   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
  8623   match(Set dst (RShiftVS src shift));
 10150   match(Set dst (RShiftVS src shift));
  8624   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
 10151   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
  8625   ins_encode %{
 10152   ins_encode %{
  8626     int vector_len = 1;
 10153     int vector_len = 1;
  8627     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
 10154     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8628   %}
 10155   %}
  8629   ins_pipe( pipe_slow );
 10156   ins_pipe( pipe_slow );
  8630 %}
 10157 %}
  8631 
 10158 
  8632 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
 10159 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{
  8633   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
 10160   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
  8634   match(Set dst (RShiftVS src shift));
 10161   match(Set dst (RShiftVS src shift));
  8635   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
 10162   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
  8636   ins_encode %{
 10163   ins_encode %{
  8637     int vector_len = 1;
 10164     int vector_len = 1;
       
 10165     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
 10166   %}
       
 10167   ins_pipe( pipe_slow );
       
 10168 %}
       
 10169 
       
 10170 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
       
 10171   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
 10172   match(Set dst (RShiftVS dst shift));
       
 10173   effect(TEMP src);
       
 10174   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
       
 10175   ins_encode %{
       
 10176     int vector_len = 1;
       
 10177     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
       
 10178   %}
       
 10179   ins_pipe( pipe_slow );
       
 10180 %}
       
 10181 
       
 10182 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
       
 10183   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
       
 10184   match(Set dst (RShiftVS src shift));
       
 10185   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
       
 10186   ins_encode %{
       
 10187     int vector_len = 1;
  8638     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
 10188     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
  8639   %}
 10189   %}
  8640   ins_pipe( pipe_slow );
 10190   ins_pipe( pipe_slow );
  8641 %}
 10191 %}
  8642 
 10192 
       
 10193 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
       
 10194   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
       
 10195   match(Set dst (RShiftVS src shift));
       
 10196   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
       
 10197   ins_encode %{
       
 10198     int vector_len = 1;
       
 10199     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
 10200   %}
       
 10201   ins_pipe( pipe_slow );
       
 10202 %}
       
 10203 
       
 10204 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
       
 10205   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
       
 10206   match(Set dst (RShiftVS dst shift));
       
 10207   effect(TEMP src);
       
 10208   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
       
 10209   ins_encode %{
       
 10210     int vector_len = 1;
       
 10211     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
       
 10212   %}
       
 10213   ins_pipe( pipe_slow );
       
 10214 %}
       
 10215 
  8643 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
 10216 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
  8644   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
 10217   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  8645   match(Set dst (RShiftVS src shift));
 10218   match(Set dst (RShiftVS src shift));
  8646   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
 10219   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
  8647   ins_encode %{
 10220   ins_encode %{
  8648     int vector_len = 2;
 10221     int vector_len = 2;
  8649     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
 10222     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8650   %}
 10223   %}
  8651   ins_pipe( pipe_slow );
 10224   ins_pipe( pipe_slow );
  8652 %}
 10225 %}
  8653 
 10226 
  8654 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
 10227 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
  8655   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
 10228   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  8656   match(Set dst (RShiftVS src shift));
 10229   match(Set dst (RShiftVS src shift));
  8657   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
 10230   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
  8658   ins_encode %{
 10231   ins_encode %{
  8659     int vector_len = 2;
 10232     int vector_len = 2;
  8660     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
 10233     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);