4808 %} |
4929 %} |
4809 ins_pipe( pipe_slow ); |
4930 ins_pipe( pipe_slow ); |
4810 %} |
4931 %} |
4811 #endif |
4932 #endif |
4812 |
4933 |
4813 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ |
4934 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ |
4814 predicate(UseSSE >= 1 && UseAVX == 0); |
4935 predicate(UseSSE >= 1 && UseAVX == 0); |
4815 match(Set dst (AddReductionVF src1 src2)); |
4936 match(Set dst (AddReductionVF dst src2)); |
4816 effect(TEMP tmp, TEMP tmp2); |
4937 effect(TEMP dst, TEMP tmp); |
4817 format %{ "movdqu $tmp,$src1\n\t" |
4938 format %{ "addss $dst,$src2\n\t" |
4818 "addss $tmp,$src2\n\t" |
4939 "pshufd $tmp,$src2,0x01\n\t" |
4819 "pshufd $tmp2,$src2,0x01\n\t" |
4940 "addss $dst,$tmp\t! add reduction2F" %} |
4820 "addss $tmp,$tmp2\n\t" |
4941 ins_encode %{ |
4821 "movdqu $dst,$tmp\t! add reduction2F" %} |
4942 __ addss($dst$$XMMRegister, $src2$$XMMRegister); |
4822 ins_encode %{ |
4943 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
4823 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
4944 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); |
4824 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); |
4945 %} |
4825 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); |
4946 ins_pipe( pipe_slow ); |
4826 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
4947 %} |
4827 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); |
4948 |
4828 %} |
4949 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ |
4829 ins_pipe( pipe_slow ); |
|
4830 %} |
|
4831 |
|
4832 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ |
|
4833 predicate(UseAVX > 0); |
4950 predicate(UseAVX > 0); |
4834 match(Set dst (AddReductionVF src1 src2)); |
4951 match(Set dst (AddReductionVF dst src2)); |
4835 effect(TEMP tmp2, TEMP tmp); |
4952 effect(TEMP dst, TEMP tmp); |
4836 format %{ "vaddss $tmp2,$src1,$src2\n\t" |
4953 format %{ "vaddss $dst,$dst,$src2\n\t" |
4837 "pshufd $tmp,$src2,0x01\n\t" |
4954 "pshufd $tmp,$src2,0x01\n\t" |
4838 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} |
4955 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} |
4839 ins_encode %{ |
4956 ins_encode %{ |
4840 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
4957 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
4841 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
4958 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
4842 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
4959 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4843 %} |
4960 %} |
4844 ins_pipe( pipe_slow ); |
4961 ins_pipe( pipe_slow ); |
4845 %} |
4962 %} |
4846 |
4963 |
4847 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ |
4964 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ |
4848 predicate(UseSSE >= 1 && UseAVX == 0); |
4965 predicate(UseSSE >= 1 && UseAVX == 0); |
4849 match(Set dst (AddReductionVF src1 src2)); |
4966 match(Set dst (AddReductionVF dst src2)); |
4850 effect(TEMP tmp, TEMP tmp2); |
4967 effect(TEMP dst, TEMP tmp); |
4851 format %{ "movdqu $tmp,$src1\n\t" |
4968 format %{ "addss $dst,$src2\n\t" |
4852 "addss $tmp,$src2\n\t" |
4969 "pshufd $tmp,$src2,0x01\n\t" |
4853 "pshufd $tmp2,$src2,0x01\n\t" |
4970 "addss $dst,$tmp\n\t" |
4854 "addss $tmp,$tmp2\n\t" |
4971 "pshufd $tmp,$src2,0x02\n\t" |
4855 "pshufd $tmp2,$src2,0x02\n\t" |
4972 "addss $dst,$tmp\n\t" |
4856 "addss $tmp,$tmp2\n\t" |
4973 "pshufd $tmp,$src2,0x03\n\t" |
4857 "pshufd $tmp2,$src2,0x03\n\t" |
4974 "addss $dst,$tmp\t! add reduction4F" %} |
4858 "addss $tmp,$tmp2\n\t" |
4975 ins_encode %{ |
4859 "movdqu $dst,$tmp\t! add reduction4F" %} |
4976 __ addss($dst$$XMMRegister, $src2$$XMMRegister); |
4860 ins_encode %{ |
4977 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
4861 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
4978 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); |
4862 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); |
4979 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
4863 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); |
4980 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); |
4864 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
4981 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
4865 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); |
4982 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); |
4866 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
4983 %} |
4867 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); |
4984 ins_pipe( pipe_slow ); |
4868 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
4985 %} |
4869 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); |
4986 |
4870 %} |
4987 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ |
4871 ins_pipe( pipe_slow ); |
|
4872 %} |
|
4873 |
|
4874 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ |
|
4875 predicate(UseAVX > 0); |
4988 predicate(UseAVX > 0); |
4876 match(Set dst (AddReductionVF src1 src2)); |
4989 match(Set dst (AddReductionVF dst src2)); |
4877 effect(TEMP tmp, TEMP tmp2); |
4990 effect(TEMP tmp, TEMP dst); |
4878 format %{ "vaddss $tmp2,$src1,$src2\n\t" |
4991 format %{ "vaddss $dst,dst,$src2\n\t" |
4879 "pshufd $tmp,$src2,0x01\n\t" |
4992 "pshufd $tmp,$src2,0x01\n\t" |
4880 "vaddss $tmp2,$tmp2,$tmp\n\t" |
4993 "vaddss $dst,$dst,$tmp\n\t" |
4881 "pshufd $tmp,$src2,0x02\n\t" |
4994 "pshufd $tmp,$src2,0x02\n\t" |
4882 "vaddss $tmp2,$tmp2,$tmp\n\t" |
4995 "vaddss $dst,$dst,$tmp\n\t" |
4883 "pshufd $tmp,$src2,0x03\n\t" |
4996 "pshufd $tmp,$src2,0x03\n\t" |
4884 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} |
4997 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} |
4885 ins_encode %{ |
4998 ins_encode %{ |
4886 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
4999 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
4887 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5000 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
4888 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5001 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4889 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5002 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
4890 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5003 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4891 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5004 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
4892 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5005 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4893 %} |
5006 %} |
4894 ins_pipe( pipe_slow ); |
5007 ins_pipe( pipe_slow ); |
4895 %} |
5008 %} |
4896 |
5009 |
4897 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ |
5010 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ |
4898 predicate(UseAVX > 0); |
5011 predicate(UseAVX > 0); |
4899 match(Set dst (AddReductionVF src1 src2)); |
5012 match(Set dst (AddReductionVF dst src2)); |
4900 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
5013 effect(TEMP tmp, TEMP dst, TEMP tmp2); |
4901 format %{ "vaddss $tmp2,$src1,$src2\n\t" |
5014 format %{ "vaddss $dst,$dst,$src2\n\t" |
4902 "pshufd $tmp,$src2,0x01\n\t" |
5015 "pshufd $tmp,$src2,0x01\n\t" |
4903 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5016 "vaddss $dst,$dst,$tmp\n\t" |
4904 "pshufd $tmp,$src2,0x02\n\t" |
5017 "pshufd $tmp,$src2,0x02\n\t" |
4905 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5018 "vaddss $dst,$dst,$tmp\n\t" |
4906 "pshufd $tmp,$src2,0x03\n\t" |
5019 "pshufd $tmp,$src2,0x03\n\t" |
4907 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5020 "vaddss $dst,$dst,$tmp\n\t" |
4908 "vextractf128 $tmp3,$src2\n\t" |
5021 "vextractf128 $tmp2,$src2\n\t" |
4909 "vaddss $tmp2,$tmp2,$tmp3\n\t" |
5022 "vaddss $dst,$dst,$tmp2\n\t" |
4910 "pshufd $tmp,$tmp3,0x01\n\t" |
5023 "pshufd $tmp,$tmp2,0x01\n\t" |
4911 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5024 "vaddss $dst,$dst,$tmp\n\t" |
4912 "pshufd $tmp,$tmp3,0x02\n\t" |
5025 "pshufd $tmp,$tmp2,0x02\n\t" |
4913 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5026 "vaddss $dst,$dst,$tmp\n\t" |
4914 "pshufd $tmp,$tmp3,0x03\n\t" |
5027 "pshufd $tmp,$tmp2,0x03\n\t" |
4915 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} |
5028 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} |
4916 ins_encode %{ |
5029 ins_encode %{ |
4917 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5030 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
4918 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5031 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
4919 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5032 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4920 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
4921 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5034 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4922 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5035 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
4923 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5036 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4924 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); |
5037 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); |
4925 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5038 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
4926 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
5039 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); |
4927 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5040 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4928 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
5041 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); |
4929 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5042 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4930 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
5043 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); |
4931 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5044 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4932 %} |
5045 %} |
4933 ins_pipe( pipe_slow ); |
5046 ins_pipe( pipe_slow ); |
4934 %} |
5047 %} |
4935 |
5048 |
4936 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ |
5049 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ |
4937 predicate(UseAVX > 2); |
5050 predicate(UseAVX > 2); |
4938 match(Set dst (AddReductionVF src1 src2)); |
5051 match(Set dst (AddReductionVF dst src2)); |
4939 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
5052 effect(TEMP tmp, TEMP dst, TEMP tmp2); |
4940 format %{ "vaddss $tmp2,$src1,$src2\n\t" |
5053 format %{ "vaddss $dst,$dst,$src2\n\t" |
4941 "pshufd $tmp,$src2,0x01\n\t" |
5054 "pshufd $tmp,$src2,0x01\n\t" |
4942 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5055 "vaddss $dst,$dst,$tmp\n\t" |
4943 "pshufd $tmp,$src2,0x02\n\t" |
5056 "pshufd $tmp,$src2,0x02\n\t" |
4944 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5057 "vaddss $dst,$dst,$tmp\n\t" |
4945 "pshufd $tmp,$src2,0x03\n\t" |
5058 "pshufd $tmp,$src2,0x03\n\t" |
4946 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5059 "vaddss $dst,$dst,$tmp\n\t" |
4947 "vextractf64x2 $tmp3,$src2, 0x1\n\t" |
5060 "vextractf32x4 $tmp2,$src2, 0x1\n\t" |
4948 "vaddss $tmp2,$tmp2,$tmp3\n\t" |
5061 "vaddss $dst,$dst,$tmp2\n\t" |
4949 "pshufd $tmp,$tmp3,0x01\n\t" |
5062 "pshufd $tmp,$tmp2,0x01\n\t" |
4950 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5063 "vaddss $dst,$dst,$tmp\n\t" |
4951 "pshufd $tmp,$tmp3,0x02\n\t" |
5064 "pshufd $tmp,$tmp2,0x02\n\t" |
4952 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5065 "vaddss $dst,$dst,$tmp\n\t" |
4953 "pshufd $tmp,$tmp3,0x03\n\t" |
5066 "pshufd $tmp,$tmp2,0x03\n\t" |
4954 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5067 "vaddss $dst,$dst,$tmp\n\t" |
4955 "vextractf64x2 $tmp3,$src2, 0x2\n\t" |
5068 "vextractf32x4 $tmp2,$src2, 0x2\n\t" |
4956 "vaddss $tmp2,$tmp2,$tmp3\n\t" |
5069 "vaddss $dst,$dst,$tmp2\n\t" |
4957 "pshufd $tmp,$tmp3,0x01\n\t" |
5070 "pshufd $tmp,$tmp2,0x01\n\t" |
4958 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5071 "vaddss $dst,$dst,$tmp\n\t" |
4959 "pshufd $tmp,$tmp3,0x02\n\t" |
5072 "pshufd $tmp,$tmp2,0x02\n\t" |
4960 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5073 "vaddss $dst,$dst,$tmp\n\t" |
4961 "pshufd $tmp,$tmp3,0x03\n\t" |
5074 "pshufd $tmp,$tmp2,0x03\n\t" |
4962 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5075 "vaddss $dst,$dst,$tmp\n\t" |
4963 "vextractf64x2 $tmp3,$src2, 0x3\n\t" |
5076 "vextractf32x4 $tmp2,$src2, 0x3\n\t" |
4964 "vaddss $tmp2,$tmp2,$tmp3\n\t" |
5077 "vaddss $dst,$dst,$tmp2\n\t" |
4965 "pshufd $tmp,$tmp3,0x01\n\t" |
5078 "pshufd $tmp,$tmp2,0x01\n\t" |
4966 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5079 "vaddss $dst,$dst,$tmp\n\t" |
4967 "pshufd $tmp,$tmp3,0x02\n\t" |
5080 "pshufd $tmp,$tmp2,0x02\n\t" |
4968 "vaddss $tmp2,$tmp2,$tmp\n\t" |
5081 "vaddss $dst,$dst,$tmp\n\t" |
4969 "pshufd $tmp,$tmp3,0x03\n\t" |
5082 "pshufd $tmp,$tmp2,0x03\n\t" |
4970 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} |
5083 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} |
4971 ins_encode %{ |
5084 ins_encode %{ |
4972 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5085 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
4973 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5086 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
4974 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5087 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4975 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5088 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
4976 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5089 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4977 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5090 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
4978 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5091 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4979 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); |
5092 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); |
4980 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5093 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
4981 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
5094 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); |
4982 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5095 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4983 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
5096 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); |
4984 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5097 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4985 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
5098 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); |
4986 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5099 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4987 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); |
5100 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); |
4988 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5101 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
4989 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
5102 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); |
4990 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5103 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4991 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
5104 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); |
4992 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5105 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4993 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
5106 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); |
4994 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5107 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4995 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); |
5108 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); |
4996 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5109 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
4997 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
5110 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); |
4998 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5111 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
4999 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
5112 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); |
5000 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5113 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5001 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
5114 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); |
5002 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5115 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5003 %} |
5116 %} |
5004 ins_pipe( pipe_slow ); |
5117 ins_pipe( pipe_slow ); |
5005 %} |
5118 %} |
5006 |
5119 |
5007 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ |
5120 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ |
5008 predicate(UseSSE >= 1 && UseAVX == 0); |
5121 predicate(UseSSE >= 1 && UseAVX == 0); |
5009 match(Set dst (AddReductionVD src1 src2)); |
5122 match(Set dst (AddReductionVD dst src2)); |
5010 effect(TEMP tmp, TEMP dst); |
5123 effect(TEMP tmp, TEMP dst); |
5011 format %{ "movdqu $tmp,$src1\n\t" |
5124 format %{ "addsd $dst,$src2\n\t" |
5012 "addsd $tmp,$src2\n\t" |
5125 "pshufd $tmp,$src2,0xE\n\t" |
5013 "pshufd $dst,$src2,0xE\n\t" |
|
5014 "addsd $dst,$tmp\t! add reduction2D" %} |
5126 "addsd $dst,$tmp\t! add reduction2D" %} |
5015 ins_encode %{ |
5127 ins_encode %{ |
5016 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
5128 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); |
5017 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); |
5129 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5018 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); |
|
5019 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); |
5130 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); |
5020 %} |
5131 %} |
5021 ins_pipe( pipe_slow ); |
5132 ins_pipe( pipe_slow ); |
5022 %} |
5133 %} |
5023 |
5134 |
5024 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ |
5135 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ |
5025 predicate(UseAVX > 0); |
5136 predicate(UseAVX > 0); |
5026 match(Set dst (AddReductionVD src1 src2)); |
5137 match(Set dst (AddReductionVD dst src2)); |
5027 effect(TEMP tmp, TEMP tmp2); |
5138 effect(TEMP tmp, TEMP dst); |
5028 format %{ "vaddsd $tmp2,$src1,$src2\n\t" |
5139 format %{ "vaddsd $dst,$dst,$src2\n\t" |
5029 "pshufd $tmp,$src2,0xE\n\t" |
5140 "pshufd $tmp,$src2,0xE\n\t" |
5030 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} |
5141 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} |
5031 ins_encode %{ |
5142 ins_encode %{ |
5032 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5143 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5144 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5034 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5145 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5035 %} |
5146 %} |
5036 ins_pipe( pipe_slow ); |
5147 ins_pipe( pipe_slow ); |
5037 %} |
5148 %} |
5038 |
5149 |
5039 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ |
5150 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ |
5040 predicate(UseAVX > 0); |
5151 predicate(UseAVX > 0); |
5041 match(Set dst (AddReductionVD src1 src2)); |
5152 match(Set dst (AddReductionVD dst src2)); |
5042 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
5153 effect(TEMP tmp, TEMP dst, TEMP tmp2); |
5043 format %{ "vaddsd $tmp2,$src1,$src2\n\t" |
5154 format %{ "vaddsd $dst,$dst,$src2\n\t" |
5044 "pshufd $tmp,$src2,0xE\n\t" |
5155 "pshufd $tmp,$src2,0xE\n\t" |
5045 "vaddsd $tmp2,$tmp2,$tmp\n\t" |
5156 "vaddsd $dst,$dst,$tmp\n\t" |
5046 "vextractf128 $tmp3,$src2\n\t" |
5157 "vextractf32x4h $tmp2,$src2, 0x1\n\t" |
5047 "vaddsd $tmp2,$tmp2,$tmp3\n\t" |
5158 "vaddsd $dst,$dst,$tmp2\n\t" |
5048 "pshufd $tmp,$tmp3,0xE\n\t" |
5159 "pshufd $tmp,$tmp2,0xE\n\t" |
5049 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} |
5160 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} |
5050 ins_encode %{ |
5161 ins_encode %{ |
5051 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5162 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5052 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5163 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5053 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5164 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5054 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); |
5165 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); |
5055 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5166 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5056 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
5167 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
5057 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5168 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5058 %} |
5169 %} |
5059 ins_pipe( pipe_slow ); |
5170 ins_pipe( pipe_slow ); |
5060 %} |
5171 %} |
5061 |
5172 |
5062 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ |
5173 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ |
5063 predicate(UseAVX > 2); |
5174 predicate(UseAVX > 2); |
5064 match(Set dst (AddReductionVD src1 src2)); |
5175 match(Set dst (AddReductionVD dst src2)); |
5065 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
5176 effect(TEMP tmp, TEMP dst, TEMP tmp2); |
5066 format %{ "vaddsd $tmp2,$src1,$src2\n\t" |
5177 format %{ "vaddsd $dst,$dst,$src2\n\t" |
5067 "pshufd $tmp,$src2,0xE\n\t" |
5178 "pshufd $tmp,$src2,0xE\n\t" |
5068 "vaddsd $tmp2,$tmp2,$tmp\n\t" |
5179 "vaddsd $dst,$dst,$tmp\n\t" |
5069 "vextractf64x2 $tmp3,$src2, 0x1\n\t" |
5180 "vextractf32x4 $tmp2,$src2, 0x1\n\t" |
5070 "vaddsd $tmp2,$tmp2,$tmp3\n\t" |
5181 "vaddsd $dst,$dst,$tmp2\n\t" |
5071 "pshufd $tmp,$tmp3,0xE\n\t" |
5182 "pshufd $tmp,$tmp2,0xE\n\t" |
5072 "vaddsd $tmp2,$tmp2,$tmp\n\t" |
5183 "vaddsd $dst,$dst,$tmp\n\t" |
5073 "vextractf64x2 $tmp3,$src2, 0x2\n\t" |
5184 "vextractf32x4 $tmp2,$src2, 0x2\n\t" |
5074 "vaddsd $tmp2,$tmp2,$tmp3\n\t" |
5185 "vaddsd $dst,$dst,$tmp2\n\t" |
5075 "pshufd $tmp,$tmp3,0xE\n\t" |
5186 "pshufd $tmp,$tmp2,0xE\n\t" |
5076 "vaddsd $tmp2,$tmp2,$tmp\n\t" |
5187 "vaddsd $dst,$dst,$tmp\n\t" |
5077 "vextractf64x2 $tmp3,$src2, 0x3\n\t" |
5188 "vextractf32x4 $tmp2,$src2, 0x3\n\t" |
5078 "vaddsd $tmp2,$tmp2,$tmp3\n\t" |
5189 "vaddsd $dst,$dst,$tmp2\n\t" |
5079 "pshufd $tmp,$tmp3,0xE\n\t" |
5190 "pshufd $tmp,$tmp2,0xE\n\t" |
5080 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} |
5191 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} |
5081 ins_encode %{ |
5192 ins_encode %{ |
5082 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5193 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5083 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5194 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5084 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5195 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5085 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); |
5196 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); |
5086 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5197 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5087 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
5198 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
5088 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5199 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5089 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); |
5200 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); |
5090 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5201 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5091 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
5202 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
5092 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5203 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5093 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); |
5204 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); |
5094 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5205 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5095 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
5206 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
5096 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5207 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5097 %} |
5208 %} |
5098 ins_pipe( pipe_slow ); |
5209 ins_pipe( pipe_slow ); |
5099 %} |
5210 %} |
5100 |
5211 |
5101 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ |
5212 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ |
5312 %} |
5423 %} |
5313 ins_pipe( pipe_slow ); |
5424 ins_pipe( pipe_slow ); |
5314 %} |
5425 %} |
5315 #endif |
5426 #endif |
5316 |
5427 |
5317 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ |
5428 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ |
5318 predicate(UseSSE >= 1 && UseAVX == 0); |
5429 predicate(UseSSE >= 1 && UseAVX == 0); |
5319 match(Set dst (MulReductionVF src1 src2)); |
5430 match(Set dst (MulReductionVF dst src2)); |
5320 effect(TEMP tmp, TEMP tmp2); |
5431 effect(TEMP dst, TEMP tmp); |
5321 format %{ "movdqu $tmp,$src1\n\t" |
5432 format %{ "mulss $dst,$src2\n\t" |
5322 "mulss $tmp,$src2\n\t" |
5433 "pshufd $tmp,$src2,0x01\n\t" |
5323 "pshufd $tmp2,$src2,0x01\n\t" |
5434 "mulss $dst,$tmp\t! mul reduction2F" %} |
5324 "mulss $tmp,$tmp2\n\t" |
5435 ins_encode %{ |
5325 "movdqu $dst,$tmp\t! mul reduction2F" %} |
5436 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); |
5326 ins_encode %{ |
5437 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5327 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
5438 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); |
5328 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); |
5439 %} |
5329 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); |
5440 ins_pipe( pipe_slow ); |
5330 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
5441 %} |
5331 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); |
5442 |
5332 %} |
5443 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ |
5333 ins_pipe( pipe_slow ); |
|
5334 %} |
|
5335 |
|
5336 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ |
|
5337 predicate(UseAVX > 0); |
5444 predicate(UseAVX > 0); |
5338 match(Set dst (MulReductionVF src1 src2)); |
5445 match(Set dst (MulReductionVF dst src2)); |
5339 effect(TEMP tmp, TEMP tmp2); |
5446 effect(TEMP tmp, TEMP dst); |
5340 format %{ "vmulss $tmp2,$src1,$src2\n\t" |
5447 format %{ "vmulss $dst,$dst,$src2\n\t" |
5341 "pshufd $tmp,$src2,0x01\n\t" |
5448 "pshufd $tmp,$src2,0x01\n\t" |
5342 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} |
5449 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} |
5343 ins_encode %{ |
5450 ins_encode %{ |
5344 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5451 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5345 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5452 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5346 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5453 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5347 %} |
5454 %} |
5348 ins_pipe( pipe_slow ); |
5455 ins_pipe( pipe_slow ); |
5349 %} |
5456 %} |
5350 |
5457 |
5351 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ |
5458 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ |
5352 predicate(UseSSE >= 1 && UseAVX == 0); |
5459 predicate(UseSSE >= 1 && UseAVX == 0); |
5353 match(Set dst (MulReductionVF src1 src2)); |
5460 match(Set dst (MulReductionVF dst src2)); |
5354 effect(TEMP tmp, TEMP tmp2); |
5461 effect(TEMP dst, TEMP tmp); |
5355 format %{ "movdqu $tmp,$src1\n\t" |
5462 format %{ "mulss $dst,$src2\n\t" |
5356 "mulss $tmp,$src2\n\t" |
5463 "pshufd $tmp,$src2,0x01\n\t" |
5357 "pshufd $tmp2,$src2,0x01\n\t" |
5464 "mulss $dst,$tmp\n\t" |
5358 "mulss $tmp,$tmp2\n\t" |
5465 "pshufd $tmp,$src2,0x02\n\t" |
5359 "pshufd $tmp2,$src2,0x02\n\t" |
5466 "mulss $dst,$tmp\n\t" |
5360 "mulss $tmp,$tmp2\n\t" |
5467 "pshufd $tmp,$src2,0x03\n\t" |
5361 "pshufd $tmp2,$src2,0x03\n\t" |
5468 "mulss $dst,$tmp\t! mul reduction4F" %} |
5362 "mulss $tmp,$tmp2\n\t" |
5469 ins_encode %{ |
5363 "movdqu $dst,$tmp\t! mul reduction4F" %} |
5470 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); |
5364 ins_encode %{ |
5471 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5365 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
5472 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); |
5366 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); |
5473 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5367 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); |
5474 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); |
5368 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
5475 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5369 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); |
5476 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); |
5370 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
5477 %} |
5371 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); |
5478 ins_pipe( pipe_slow ); |
5372 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); |
5479 %} |
5373 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); |
5480 |
5374 %} |
5481 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ |
5375 ins_pipe( pipe_slow ); |
|
5376 %} |
|
5377 |
|
5378 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ |
|
5379 predicate(UseAVX > 0); |
5482 predicate(UseAVX > 0); |
5380 match(Set dst (MulReductionVF src1 src2)); |
5483 match(Set dst (MulReductionVF dst src2)); |
5381 effect(TEMP tmp, TEMP tmp2); |
5484 effect(TEMP tmp, TEMP dst); |
5382 format %{ "vmulss $tmp2,$src1,$src2\n\t" |
5485 format %{ "vmulss $dst,$dst,$src2\n\t" |
5383 "pshufd $tmp,$src2,0x01\n\t" |
5486 "pshufd $tmp,$src2,0x01\n\t" |
5384 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5487 "vmulss $dst,$dst,$tmp\n\t" |
5385 "pshufd $tmp,$src2,0x02\n\t" |
5488 "pshufd $tmp,$src2,0x02\n\t" |
5386 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5489 "vmulss $dst,$dst,$tmp\n\t" |
5387 "pshufd $tmp,$src2,0x03\n\t" |
5490 "pshufd $tmp,$src2,0x03\n\t" |
5388 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} |
5491 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} |
5389 ins_encode %{ |
5492 ins_encode %{ |
5390 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5493 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5391 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5494 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5392 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5495 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5393 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5496 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5394 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5497 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5395 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5498 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5396 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5499 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5397 %} |
5500 %} |
5398 ins_pipe( pipe_slow ); |
5501 ins_pipe( pipe_slow ); |
5399 %} |
5502 %} |
5400 |
5503 |
5401 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ |
5504 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ |
5402 predicate(UseAVX > 0); |
5505 predicate(UseAVX > 0); |
5403 match(Set dst (MulReductionVF src1 src2)); |
5506 match(Set dst (MulReductionVF dst src2)); |
5404 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
5507 effect(TEMP tmp, TEMP dst, TEMP tmp2); |
5405 format %{ "vmulss $tmp2,$src1,$src2\n\t" |
5508 format %{ "vmulss $dst,$dst,$src2\n\t" |
5406 "pshufd $tmp,$src2,0x01\n\t" |
5509 "pshufd $tmp,$src2,0x01\n\t" |
5407 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5510 "vmulss $dst,$dst,$tmp\n\t" |
5408 "pshufd $tmp,$src2,0x02\n\t" |
5511 "pshufd $tmp,$src2,0x02\n\t" |
5409 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5512 "vmulss $dst,$dst,$tmp\n\t" |
5410 "pshufd $tmp,$src2,0x03\n\t" |
5513 "pshufd $tmp,$src2,0x03\n\t" |
5411 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5514 "vmulss $dst,$dst,$tmp\n\t" |
5412 "vextractf128 $tmp3,$src2\n\t" |
5515 "vextractf128 $tmp2,$src2\n\t" |
5413 "vmulss $tmp2,$tmp2,$tmp3\n\t" |
5516 "vmulss $dst,$dst,$tmp2\n\t" |
5414 "pshufd $tmp,$tmp3,0x01\n\t" |
5517 "pshufd $tmp,$tmp2,0x01\n\t" |
5415 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5518 "vmulss $dst,$dst,$tmp\n\t" |
5416 "pshufd $tmp,$tmp3,0x02\n\t" |
5519 "pshufd $tmp,$tmp2,0x02\n\t" |
5417 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5520 "vmulss $dst,$dst,$tmp\n\t" |
5418 "pshufd $tmp,$tmp3,0x03\n\t" |
5521 "pshufd $tmp,$tmp2,0x03\n\t" |
5419 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} |
5522 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} |
5420 ins_encode %{ |
5523 ins_encode %{ |
5421 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5524 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5422 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5525 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5423 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5526 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5424 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5527 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5425 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5528 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5426 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5529 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5427 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5530 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5428 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); |
5531 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); |
5429 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5532 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5430 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
5533 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); |
5431 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5534 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5432 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
5535 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); |
5433 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5536 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5434 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
5537 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); |
5435 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5538 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5436 %} |
5539 %} |
5437 ins_pipe( pipe_slow ); |
5540 ins_pipe( pipe_slow ); |
5438 %} |
5541 %} |
5439 |
5542 |
5440 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ |
5543 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ |
5441 predicate(UseAVX > 2); |
5544 predicate(UseAVX > 2); |
5442 match(Set dst (MulReductionVF src1 src2)); |
5545 match(Set dst (MulReductionVF dst src2)); |
5443 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
5546 effect(TEMP tmp, TEMP dst, TEMP tmp2); |
5444 format %{ "vmulss $tmp2,$src1,$src2\n\t" |
5547 format %{ "vmulss $dst,$dst,$src2\n\t" |
5445 "pshufd $tmp,$src2,0x01\n\t" |
5548 "pshufd $tmp,$src2,0x01\n\t" |
5446 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5549 "vmulss $dst,$dst,$tmp\n\t" |
5447 "pshufd $tmp,$src2,0x02\n\t" |
5550 "pshufd $tmp,$src2,0x02\n\t" |
5448 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5551 "vmulss $dst,$dst,$tmp\n\t" |
5449 "pshufd $tmp,$src2,0x03\n\t" |
5552 "pshufd $tmp,$src2,0x03\n\t" |
5450 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5553 "vmulss $dst,$dst,$tmp\n\t" |
5451 "vextractf32x4 $tmp3,$src2, 0x1\n\t" |
5554 "vextractf32x4 $tmp2,$src2, 0x1\n\t" |
5452 "vmulss $tmp2,$tmp2,$tmp3\n\t" |
5555 "vmulss $dst,$dst,$tmp2\n\t" |
5453 "pshufd $tmp,$tmp3,0x01\n\t" |
5556 "pshufd $tmp,$tmp2,0x01\n\t" |
5454 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5557 "vmulss $dst,$dst,$tmp\n\t" |
5455 "pshufd $tmp,$tmp3,0x02\n\t" |
5558 "pshufd $tmp,$tmp2,0x02\n\t" |
5456 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5559 "vmulss $dst,$dst,$tmp\n\t" |
5457 "pshufd $tmp,$tmp3,0x03\n\t" |
5560 "pshufd $tmp,$tmp2,0x03\n\t" |
5458 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5561 "vmulss $dst,$dst,$tmp\n\t" |
5459 "vextractf32x4 $tmp3,$src2, 0x2\n\t" |
5562 "vextractf32x4 $tmp2,$src2, 0x2\n\t" |
5460 "vmulss $tmp2,$tmp2,$tmp3\n\t" |
5563 "vmulss $dst,$dst,$tmp2\n\t" |
5461 "pshufd $tmp,$tmp3,0x01\n\t" |
5564 "pshufd $tmp,$tmp2,0x01\n\t" |
5462 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5565 "vmulss $dst,$dst,$tmp\n\t" |
5463 "pshufd $tmp,$tmp3,0x02\n\t" |
5566 "pshufd $tmp,$tmp2,0x02\n\t" |
5464 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5567 "vmulss $dst,$dst,$tmp\n\t" |
5465 "pshufd $tmp,$tmp3,0x03\n\t" |
5568 "pshufd $tmp,$tmp2,0x03\n\t" |
5466 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5569 "vmulss $dst,$dst,$tmp\n\t" |
5467 "vextractf32x4 $tmp3,$src2, 0x3\n\t" |
5570 "vextractf32x4 $tmp2,$src2, 0x3\n\t" |
5468 "vmulss $tmp2,$tmp2,$tmp3\n\t" |
5571 "vmulss $dst,$dst,$tmp2\n\t" |
5469 "pshufd $tmp,$tmp3,0x01\n\t" |
5572 "pshufd $tmp,$tmp2,0x01\n\t" |
5470 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5573 "vmulss $dst,$dst,$tmp\n\t" |
5471 "pshufd $tmp,$tmp3,0x02\n\t" |
5574 "pshufd $tmp,$tmp2,0x02\n\t" |
5472 "vmulss $tmp2,$tmp2,$tmp\n\t" |
5575 "vmulss $dst,$dst,$tmp\n\t" |
5473 "pshufd $tmp,$tmp3,0x03\n\t" |
5576 "pshufd $tmp,$tmp2,0x03\n\t" |
5474 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} |
5577 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} |
5475 ins_encode %{ |
5578 ins_encode %{ |
5476 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5579 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5477 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5580 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); |
5478 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5581 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5479 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5582 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); |
5480 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5583 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5481 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5584 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); |
5482 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5585 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5483 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); |
5586 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); |
5484 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5587 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5485 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
5588 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); |
5486 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5589 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5487 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
5590 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); |
5488 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5591 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5489 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
5592 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); |
5490 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5593 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5491 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); |
5594 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); |
5492 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5595 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5493 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
5596 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); |
5494 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5597 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5495 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
5598 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); |
5496 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5599 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5497 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
5600 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); |
5498 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5601 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5499 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); |
5602 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); |
5500 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5603 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5501 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); |
5604 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); |
5502 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5605 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5503 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); |
5606 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); |
5504 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5607 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5505 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); |
5608 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); |
5506 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5609 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5507 %} |
5610 %} |
5508 ins_pipe( pipe_slow ); |
5611 ins_pipe( pipe_slow ); |
5509 %} |
5612 %} |
5510 |
5613 |
5511 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ |
5614 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ |
5512 predicate(UseSSE >= 1 && UseAVX == 0); |
5615 predicate(UseSSE >= 1 && UseAVX == 0); |
5513 match(Set dst (MulReductionVD src1 src2)); |
5616 match(Set dst (MulReductionVD dst src2)); |
|
5617 effect(TEMP dst, TEMP tmp); |
|
5618 format %{ "mulsd $dst,$src2\n\t" |
|
5619 "pshufd $tmp,$src2,0xE\n\t" |
|
5620 "mulsd $dst,$tmp\t! mul reduction2D" %} |
|
5621 ins_encode %{ |
|
5622 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); |
|
5623 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
|
5624 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); |
|
5625 %} |
|
5626 ins_pipe( pipe_slow ); |
|
5627 %} |
|
5628 |
|
5629 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ |
|
5630 predicate(UseAVX > 0); |
|
5631 match(Set dst (MulReductionVD dst src2)); |
5514 effect(TEMP tmp, TEMP dst); |
5632 effect(TEMP tmp, TEMP dst); |
5515 format %{ "movdqu $tmp,$src1\n\t" |
5633 format %{ "vmulsd $dst,$dst,$src2\n\t" |
5516 "mulsd $tmp,$src2\n\t" |
5634 "pshufd $tmp,$src2,0xE\n\t" |
5517 "pshufd $dst,$src2,0xE\n\t" |
5635 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} |
5518 "mulsd $dst,$tmp\t! mul reduction2D" %} |
5636 ins_encode %{ |
5519 ins_encode %{ |
5637 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5520 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); |
5638 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5521 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); |
5639 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5522 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); |
5640 %} |
5523 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); |
5641 ins_pipe( pipe_slow ); |
5524 %} |
5642 %} |
5525 ins_pipe( pipe_slow ); |
5643 |
5526 %} |
5644 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ |
5527 |
|
5528 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ |
|
5529 predicate(UseAVX > 0); |
5645 predicate(UseAVX > 0); |
5530 match(Set dst (MulReductionVD src1 src2)); |
5646 match(Set dst (MulReductionVD dst src2)); |
5531 effect(TEMP tmp, TEMP tmp2); |
5647 effect(TEMP tmp, TEMP dst, TEMP tmp2); |
5532 format %{ "vmulsd $tmp2,$src1,$src2\n\t" |
5648 format %{ "vmulsd $dst,$dst,$src2\n\t" |
5533 "pshufd $tmp,$src2,0xE\n\t" |
5649 "pshufd $tmp,$src2,0xE\n\t" |
5534 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} |
5650 "vmulsd $dst,$dst,$tmp\n\t" |
5535 ins_encode %{ |
5651 "vextractf128 $tmp2,$src2\n\t" |
5536 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5652 "vmulsd $dst,$dst,$tmp2\n\t" |
|
5653 "pshufd $tmp,$tmp2,0xE\n\t" |
|
5654 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} |
|
5655 ins_encode %{ |
|
5656 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5537 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5657 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5538 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5658 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5539 %} |
5659 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); |
5540 ins_pipe( pipe_slow ); |
5660 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5541 %} |
5661 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
5542 |
5662 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5543 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ |
5663 %} |
5544 predicate(UseAVX > 0); |
5664 ins_pipe( pipe_slow ); |
5545 match(Set dst (MulReductionVD src1 src2)); |
5665 %} |
5546 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
5666 |
5547 format %{ "vmulsd $tmp2,$src1,$src2\n\t" |
5667 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ |
|
5668 predicate(UseAVX > 2); |
|
5669 match(Set dst (MulReductionVD dst src2)); |
|
5670 effect(TEMP tmp, TEMP dst, TEMP tmp2); |
|
5671 format %{ "vmulsd $dst,$dst,$src2\n\t" |
5548 "pshufd $tmp,$src2,0xE\n\t" |
5672 "pshufd $tmp,$src2,0xE\n\t" |
5549 "vmulsd $tmp2,$tmp2,$tmp\n\t" |
5673 "vmulsd $dst,$dst,$tmp\n\t" |
5550 "vextractf128 $tmp3,$src2\n\t" |
5674 "vextractf32x4 $tmp2,$src2, 0x1\n\t" |
5551 "vmulsd $tmp2,$tmp2,$tmp3\n\t" |
5675 "vmulsd $dst,$dst,$tmp2\n\t" |
5552 "pshufd $tmp,$tmp3,0xE\n\t" |
5676 "pshufd $tmp,$src2,0xE\n\t" |
5553 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} |
5677 "vmulsd $dst,$dst,$tmp\n\t" |
5554 ins_encode %{ |
5678 "vextractf32x4 $tmp2,$src2, 0x2\n\t" |
5555 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
5679 "vmulsd $dst,$dst,$tmp2\n\t" |
|
5680 "pshufd $tmp,$tmp2,0xE\n\t" |
|
5681 "vmulsd $dst,$dst,$tmp\n\t" |
|
5682 "vextractf32x4 $tmp2,$src2, 0x3\n\t" |
|
5683 "vmulsd $dst,$dst,$tmp2\n\t" |
|
5684 "pshufd $tmp,$tmp2,0xE\n\t" |
|
5685 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} |
|
5686 ins_encode %{ |
|
5687 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); |
5556 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5688 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
5557 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5689 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5558 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); |
5690 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); |
5559 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
5691 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5560 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
5692 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
5561 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
5693 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5562 %} |
5694 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); |
5563 ins_pipe( pipe_slow ); |
5695 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5564 %} |
5696 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
5565 |
5697 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5566 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ |
5698 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); |
5567 predicate(UseAVX > 2); |
5699 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); |
5568 match(Set dst (MulReductionVD src1 src2)); |
5700 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); |
5569 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); |
5701 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); |
5570 format %{ "vmulsd $tmp2,$src1,$src2\n\t" |
|
5571 "pshufd $tmp,$src2,0xE\n\t" |
|
5572 "vmulsd $tmp2,$tmp2,$tmp\n\t" |
|
5573 "vextractf64x2 $tmp3,$src2, 0x1\n\t" |
|
5574 "vmulsd $tmp2,$tmp2,$tmp3\n\t" |
|
5575 "pshufd $tmp,$src2,0xE\n\t" |
|
5576 "vmulsd $tmp2,$tmp2,$tmp\n\t" |
|
5577 "vextractf64x2 $tmp3,$src2, 0x2\n\t" |
|
5578 "vmulsd $tmp2,$tmp2,$tmp3\n\t" |
|
5579 "pshufd $tmp,$tmp3,0xE\n\t" |
|
5580 "vmulsd $tmp2,$tmp2,$tmp\n\t" |
|
5581 "vextractf64x2 $tmp3,$src2, 0x3\n\t" |
|
5582 "vmulsd $tmp2,$tmp2,$tmp3\n\t" |
|
5583 "pshufd $tmp,$tmp3,0xE\n\t" |
|
5584 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} |
|
5585 ins_encode %{ |
|
5586 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); |
|
5587 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); |
|
5588 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
|
5589 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); |
|
5590 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
|
5591 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
|
5592 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
|
5593 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); |
|
5594 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
|
5595 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
|
5596 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
|
5597 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); |
|
5598 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); |
|
5599 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); |
|
5600 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); |
|
5601 %} |
5702 %} |
5602 ins_pipe( pipe_slow ); |
5703 ins_pipe( pipe_slow ); |
5603 %} |
5704 %} |
5604 |
5705 |
5605 // ====================VECTOR ARITHMETIC======================================= |
5706 // ====================VECTOR ARITHMETIC======================================= |
5606 |
5707 |
5607 // --------------------------------- ADD -------------------------------------- |
5708 // --------------------------------- ADD -------------------------------------- |
5608 |
5709 |
5609 // Bytes vector add |
5710 // Bytes vector add |
5610 instruct vadd4B(vecS dst, vecS src) %{ |
5711 instruct vadd4B(vecS dst, vecS src) %{ |
5611 predicate(n->as_Vector()->length() == 4); |
5712 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
5612 match(Set dst (AddVB dst src)); |
5713 match(Set dst (AddVB dst src)); |
5613 format %{ "paddb $dst,$src\t! add packed4B" %} |
5714 format %{ "paddb $dst,$src\t! add packed4B" %} |
5614 ins_encode %{ |
5715 ins_encode %{ |
5615 __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
5716 __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
5616 %} |
5717 %} |
5617 ins_pipe( pipe_slow ); |
5718 ins_pipe( pipe_slow ); |
5618 %} |
5719 %} |
5619 |
5720 |
5620 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ |
5721 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ |
5621 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
5722 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
5622 match(Set dst (AddVB src1 src2)); |
5723 match(Set dst (AddVB src1 src2)); |
5623 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} |
5724 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} |
5624 ins_encode %{ |
5725 ins_encode %{ |
5625 int vector_len = 0; |
5726 int vector_len = 0; |
5626 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5727 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5627 %} |
5728 %} |
5628 ins_pipe( pipe_slow ); |
5729 ins_pipe( pipe_slow ); |
5629 %} |
5730 %} |
5630 |
5731 |
5631 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ |
5732 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ |
5632 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
5733 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
5734 match(Set dst (AddVB src1 src2)); |
|
5735 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} |
|
5736 ins_encode %{ |
|
5737 int vector_len = 0; |
|
5738 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
5739 %} |
|
5740 ins_pipe( pipe_slow ); |
|
5741 %} |
|
5742 |
|
5743 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ |
|
5744 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
5745 match(Set dst (AddVB dst src2)); |
|
5746 effect(TEMP src1); |
|
5747 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} |
|
5748 ins_encode %{ |
|
5749 int vector_len = 0; |
|
5750 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
5751 %} |
|
5752 ins_pipe( pipe_slow ); |
|
5753 %} |
|
5754 |
|
5755 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ |
|
5756 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
5633 match(Set dst (AddVB src (LoadVector mem))); |
5757 match(Set dst (AddVB src (LoadVector mem))); |
5634 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} |
5758 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} |
5635 ins_encode %{ |
5759 ins_encode %{ |
5636 int vector_len = 0; |
5760 int vector_len = 0; |
5637 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5761 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5638 %} |
5762 %} |
5639 ins_pipe( pipe_slow ); |
5763 ins_pipe( pipe_slow ); |
5640 %} |
5764 %} |
5641 |
5765 |
|
5766 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ |
|
5767 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
5768 match(Set dst (AddVB src (LoadVector mem))); |
|
5769 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} |
|
5770 ins_encode %{ |
|
5771 int vector_len = 0; |
|
5772 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
5773 %} |
|
5774 ins_pipe( pipe_slow ); |
|
5775 %} |
|
5776 |
|
5777 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ |
|
5778 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
5779 match(Set dst (AddVB dst (LoadVector mem))); |
|
5780 effect(TEMP src); |
|
5781 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} |
|
5782 ins_encode %{ |
|
5783 int vector_len = 0; |
|
5784 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
5785 %} |
|
5786 ins_pipe( pipe_slow ); |
|
5787 %} |
|
5788 |
5642 instruct vadd8B(vecD dst, vecD src) %{ |
5789 instruct vadd8B(vecD dst, vecD src) %{ |
5643 predicate(n->as_Vector()->length() == 8); |
5790 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
5644 match(Set dst (AddVB dst src)); |
5791 match(Set dst (AddVB dst src)); |
5645 format %{ "paddb $dst,$src\t! add packed8B" %} |
5792 format %{ "paddb $dst,$src\t! add packed8B" %} |
5646 ins_encode %{ |
5793 ins_encode %{ |
5647 __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
5794 __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
5648 %} |
5795 %} |
5649 ins_pipe( pipe_slow ); |
5796 ins_pipe( pipe_slow ); |
5650 %} |
5797 %} |
5651 |
5798 |
5652 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ |
5799 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ |
5653 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
5800 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
5654 match(Set dst (AddVB src1 src2)); |
5801 match(Set dst (AddVB src1 src2)); |
5655 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} |
5802 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} |
5656 ins_encode %{ |
5803 ins_encode %{ |
5657 int vector_len = 0; |
5804 int vector_len = 0; |
5658 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5805 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5659 %} |
5806 %} |
5660 ins_pipe( pipe_slow ); |
5807 ins_pipe( pipe_slow ); |
5661 %} |
5808 %} |
5662 |
5809 |
5663 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ |
5810 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ |
5664 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
5811 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
5812 match(Set dst (AddVB src1 src2)); |
|
5813 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} |
|
5814 ins_encode %{ |
|
5815 int vector_len = 0; |
|
5816 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
5817 %} |
|
5818 ins_pipe( pipe_slow ); |
|
5819 %} |
|
5820 |
|
5821 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ |
|
5822 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
5823 match(Set dst (AddVB dst src2)); |
|
5824 effect(TEMP src1); |
|
5825 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} |
|
5826 ins_encode %{ |
|
5827 int vector_len = 0; |
|
5828 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
5829 %} |
|
5830 ins_pipe( pipe_slow ); |
|
5831 %} |
|
5832 |
|
5833 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ |
|
5834 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
5665 match(Set dst (AddVB src (LoadVector mem))); |
5835 match(Set dst (AddVB src (LoadVector mem))); |
5666 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} |
5836 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} |
5667 ins_encode %{ |
5837 ins_encode %{ |
5668 int vector_len = 0; |
5838 int vector_len = 0; |
5669 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5839 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5670 %} |
5840 %} |
5671 ins_pipe( pipe_slow ); |
5841 ins_pipe( pipe_slow ); |
5672 %} |
5842 %} |
5673 |
5843 |
|
5844 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ |
|
5845 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
5846 match(Set dst (AddVB src (LoadVector mem))); |
|
5847 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} |
|
5848 ins_encode %{ |
|
5849 int vector_len = 0; |
|
5850 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
5851 %} |
|
5852 ins_pipe( pipe_slow ); |
|
5853 %} |
|
5854 |
|
5855 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ |
|
5856 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
5857 match(Set dst (AddVB dst (LoadVector mem))); |
|
5858 effect(TEMP src); |
|
5859 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} |
|
5860 ins_encode %{ |
|
5861 int vector_len = 0; |
|
5862 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
5863 %} |
|
5864 ins_pipe( pipe_slow ); |
|
5865 %} |
|
5866 |
5674 instruct vadd16B(vecX dst, vecX src) %{ |
5867 instruct vadd16B(vecX dst, vecX src) %{ |
5675 predicate(n->as_Vector()->length() == 16); |
5868 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); |
5676 match(Set dst (AddVB dst src)); |
5869 match(Set dst (AddVB dst src)); |
5677 format %{ "paddb $dst,$src\t! add packed16B" %} |
5870 format %{ "paddb $dst,$src\t! add packed16B" %} |
5678 ins_encode %{ |
5871 ins_encode %{ |
5679 __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
5872 __ paddb($dst$$XMMRegister, $src$$XMMRegister); |
5680 %} |
5873 %} |
5681 ins_pipe( pipe_slow ); |
5874 ins_pipe( pipe_slow ); |
5682 %} |
5875 %} |
5683 |
5876 |
5684 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ |
5877 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ |
5685 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
5878 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); |
5686 match(Set dst (AddVB src1 src2)); |
5879 match(Set dst (AddVB src1 src2)); |
5687 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} |
5880 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} |
5688 ins_encode %{ |
5881 ins_encode %{ |
5689 int vector_len = 0; |
5882 int vector_len = 0; |
5690 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5883 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5691 %} |
5884 %} |
5692 ins_pipe( pipe_slow ); |
5885 ins_pipe( pipe_slow ); |
5693 %} |
5886 %} |
5694 |
5887 |
5695 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ |
5888 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ |
5696 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
5889 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
5890 match(Set dst (AddVB src1 src2)); |
|
5891 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} |
|
5892 ins_encode %{ |
|
5893 int vector_len = 0; |
|
5894 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
5895 %} |
|
5896 ins_pipe( pipe_slow ); |
|
5897 %} |
|
5898 |
|
5899 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ |
|
5900 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
5901 match(Set dst (AddVB dst src2)); |
|
5902 effect(TEMP src1); |
|
5903 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} |
|
5904 ins_encode %{ |
|
5905 int vector_len = 0; |
|
5906 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
5907 %} |
|
5908 ins_pipe( pipe_slow ); |
|
5909 %} |
|
5910 |
|
5911 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ |
|
5912 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); |
5697 match(Set dst (AddVB src (LoadVector mem))); |
5913 match(Set dst (AddVB src (LoadVector mem))); |
5698 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} |
5914 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} |
5699 ins_encode %{ |
5915 ins_encode %{ |
5700 int vector_len = 0; |
5916 int vector_len = 0; |
5701 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5917 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5702 %} |
5918 %} |
5703 ins_pipe( pipe_slow ); |
5919 ins_pipe( pipe_slow ); |
5704 %} |
5920 %} |
5705 |
5921 |
5706 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ |
5922 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ |
5707 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); |
5923 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
5924 match(Set dst (AddVB src (LoadVector mem))); |
|
5925 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} |
|
5926 ins_encode %{ |
|
5927 int vector_len = 0; |
|
5928 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
5929 %} |
|
5930 ins_pipe( pipe_slow ); |
|
5931 %} |
|
5932 |
|
5933 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ |
|
5934 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
5935 match(Set dst (AddVB dst (LoadVector mem))); |
|
5936 effect(TEMP src); |
|
5937 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} |
|
5938 ins_encode %{ |
|
5939 int vector_len = 0; |
|
5940 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
5941 %} |
|
5942 ins_pipe( pipe_slow ); |
|
5943 %} |
|
5944 |
|
5945 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ |
|
5946 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); |
5708 match(Set dst (AddVB src1 src2)); |
5947 match(Set dst (AddVB src1 src2)); |
5709 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} |
5948 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} |
5710 ins_encode %{ |
5949 ins_encode %{ |
5711 int vector_len = 1; |
5950 int vector_len = 1; |
5712 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5951 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5713 %} |
5952 %} |
5714 ins_pipe( pipe_slow ); |
5953 ins_pipe( pipe_slow ); |
5715 %} |
5954 %} |
5716 |
5955 |
5717 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ |
5956 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ |
5718 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); |
5957 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
|
5958 match(Set dst (AddVB src1 src2)); |
|
5959 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} |
|
5960 ins_encode %{ |
|
5961 int vector_len = 1; |
|
5962 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
5963 %} |
|
5964 ins_pipe( pipe_slow ); |
|
5965 %} |
|
5966 |
|
5967 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ |
|
5968 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); |
|
5969 match(Set dst (AddVB dst src2)); |
|
5970 effect(TEMP src1); |
|
5971 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} |
|
5972 ins_encode %{ |
|
5973 int vector_len = 1; |
|
5974 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
5975 %} |
|
5976 ins_pipe( pipe_slow ); |
|
5977 %} |
|
5978 |
|
5979 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ |
|
5980 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); |
5719 match(Set dst (AddVB src (LoadVector mem))); |
5981 match(Set dst (AddVB src (LoadVector mem))); |
5720 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} |
5982 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} |
5721 ins_encode %{ |
5983 ins_encode %{ |
5722 int vector_len = 1; |
5984 int vector_len = 1; |
5723 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5985 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5724 %} |
5986 %} |
5725 ins_pipe( pipe_slow ); |
5987 ins_pipe( pipe_slow ); |
5726 %} |
5988 %} |
5727 |
5989 |
|
5990 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ |
|
5991 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
|
5992 match(Set dst (AddVB src (LoadVector mem))); |
|
5993 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} |
|
5994 ins_encode %{ |
|
5995 int vector_len = 1; |
|
5996 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
5997 %} |
|
5998 ins_pipe( pipe_slow ); |
|
5999 %} |
|
6000 |
|
6001 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ |
|
6002 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
|
6003 match(Set dst (AddVB dst (LoadVector mem))); |
|
6004 effect(TEMP src); |
|
6005 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} |
|
6006 ins_encode %{ |
|
6007 int vector_len = 1; |
|
6008 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6009 %} |
|
6010 ins_pipe( pipe_slow ); |
|
6011 %} |
|
6012 |
5728 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
6013 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
5729 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); |
6014 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); |
5730 match(Set dst (AddVB src1 src2)); |
6015 match(Set dst (AddVB src1 src2)); |
5731 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} |
6016 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} |
5732 ins_encode %{ |
6017 ins_encode %{ |
5733 int vector_len = 2; |
6018 int vector_len = 2; |
5734 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6019 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5735 %} |
6020 %} |
5736 ins_pipe( pipe_slow ); |
6021 ins_pipe( pipe_slow ); |
5737 %} |
6022 %} |
5738 |
6023 |
5739 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ |
6024 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ |
5740 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); |
6025 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); |
5741 match(Set dst (AddVB src (LoadVector mem))); |
6026 match(Set dst (AddVB src (LoadVector mem))); |
5742 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} |
6027 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} |
5743 ins_encode %{ |
6028 ins_encode %{ |
5744 int vector_len = 2; |
6029 int vector_len = 2; |
5745 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6030 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5747 ins_pipe( pipe_slow ); |
6032 ins_pipe( pipe_slow ); |
5748 %} |
6033 %} |
5749 |
6034 |
5750 // Shorts/Chars vector add |
6035 // Shorts/Chars vector add |
5751 instruct vadd2S(vecS dst, vecS src) %{ |
6036 instruct vadd2S(vecS dst, vecS src) %{ |
5752 predicate(n->as_Vector()->length() == 2); |
6037 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); |
5753 match(Set dst (AddVS dst src)); |
6038 match(Set dst (AddVS dst src)); |
5754 format %{ "paddw $dst,$src\t! add packed2S" %} |
6039 format %{ "paddw $dst,$src\t! add packed2S" %} |
5755 ins_encode %{ |
6040 ins_encode %{ |
5756 __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
6041 __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
5757 %} |
6042 %} |
5758 ins_pipe( pipe_slow ); |
6043 ins_pipe( pipe_slow ); |
5759 %} |
6044 %} |
5760 |
6045 |
5761 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ |
6046 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ |
5762 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
6047 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
5763 match(Set dst (AddVS src1 src2)); |
6048 match(Set dst (AddVS src1 src2)); |
5764 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} |
6049 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} |
5765 ins_encode %{ |
6050 ins_encode %{ |
5766 int vector_len = 0; |
6051 int vector_len = 0; |
5767 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6052 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5768 %} |
6053 %} |
5769 ins_pipe( pipe_slow ); |
6054 ins_pipe( pipe_slow ); |
5770 %} |
6055 %} |
5771 |
6056 |
5772 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ |
6057 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ |
5773 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
6058 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
6059 match(Set dst (AddVS src1 src2)); |
|
6060 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} |
|
6061 ins_encode %{ |
|
6062 int vector_len = 0; |
|
6063 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6064 %} |
|
6065 ins_pipe( pipe_slow ); |
|
6066 %} |
|
6067 |
|
6068 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ |
|
6069 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
6070 match(Set dst (AddVS dst src2)); |
|
6071 effect(TEMP src1); |
|
6072 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} |
|
6073 ins_encode %{ |
|
6074 int vector_len = 0; |
|
6075 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6076 %} |
|
6077 ins_pipe( pipe_slow ); |
|
6078 %} |
|
6079 |
|
6080 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ |
|
6081 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
5774 match(Set dst (AddVS src (LoadVector mem))); |
6082 match(Set dst (AddVS src (LoadVector mem))); |
5775 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} |
6083 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} |
5776 ins_encode %{ |
6084 ins_encode %{ |
5777 int vector_len = 0; |
6085 int vector_len = 0; |
5778 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6086 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5779 %} |
6087 %} |
5780 ins_pipe( pipe_slow ); |
6088 ins_pipe( pipe_slow ); |
5781 %} |
6089 %} |
5782 |
6090 |
|
6091 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ |
|
6092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
6093 match(Set dst (AddVS src (LoadVector mem))); |
|
6094 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} |
|
6095 ins_encode %{ |
|
6096 int vector_len = 0; |
|
6097 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6098 %} |
|
6099 ins_pipe( pipe_slow ); |
|
6100 %} |
|
6101 |
|
6102 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ |
|
6103 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
6104 match(Set dst (AddVS dst (LoadVector mem))); |
|
6105 effect(TEMP src); |
|
6106 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} |
|
6107 ins_encode %{ |
|
6108 int vector_len = 0; |
|
6109 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6110 %} |
|
6111 ins_pipe( pipe_slow ); |
|
6112 %} |
|
6113 |
5783 instruct vadd4S(vecD dst, vecD src) %{ |
6114 instruct vadd4S(vecD dst, vecD src) %{ |
5784 predicate(n->as_Vector()->length() == 4); |
6115 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
5785 match(Set dst (AddVS dst src)); |
6116 match(Set dst (AddVS dst src)); |
5786 format %{ "paddw $dst,$src\t! add packed4S" %} |
6117 format %{ "paddw $dst,$src\t! add packed4S" %} |
5787 ins_encode %{ |
6118 ins_encode %{ |
5788 __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
6119 __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
5789 %} |
6120 %} |
5790 ins_pipe( pipe_slow ); |
6121 ins_pipe( pipe_slow ); |
5791 %} |
6122 %} |
5792 |
6123 |
5793 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ |
6124 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ |
5794 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
6125 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
5795 match(Set dst (AddVS src1 src2)); |
6126 match(Set dst (AddVS src1 src2)); |
5796 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} |
6127 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} |
5797 ins_encode %{ |
6128 ins_encode %{ |
5798 int vector_len = 0; |
6129 int vector_len = 0; |
5799 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6130 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5800 %} |
6131 %} |
5801 ins_pipe( pipe_slow ); |
6132 ins_pipe( pipe_slow ); |
5802 %} |
6133 %} |
5803 |
6134 |
5804 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ |
6135 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ |
5805 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
6136 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
6137 match(Set dst (AddVS src1 src2)); |
|
6138 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} |
|
6139 ins_encode %{ |
|
6140 int vector_len = 0; |
|
6141 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6142 %} |
|
6143 ins_pipe( pipe_slow ); |
|
6144 %} |
|
6145 |
|
6146 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ |
|
6147 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
6148 match(Set dst (AddVS dst src2)); |
|
6149 effect(TEMP src1); |
|
6150 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} |
|
6151 ins_encode %{ |
|
6152 int vector_len = 0; |
|
6153 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6154 %} |
|
6155 ins_pipe( pipe_slow ); |
|
6156 %} |
|
6157 |
|
6158 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ |
|
6159 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
5806 match(Set dst (AddVS src (LoadVector mem))); |
6160 match(Set dst (AddVS src (LoadVector mem))); |
5807 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} |
6161 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} |
5808 ins_encode %{ |
6162 ins_encode %{ |
5809 int vector_len = 0; |
6163 int vector_len = 0; |
5810 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6164 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5811 %} |
6165 %} |
5812 ins_pipe( pipe_slow ); |
6166 ins_pipe( pipe_slow ); |
5813 %} |
6167 %} |
5814 |
6168 |
|
6169 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ |
|
6170 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
6171 match(Set dst (AddVS src (LoadVector mem))); |
|
6172 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} |
|
6173 ins_encode %{ |
|
6174 int vector_len = 0; |
|
6175 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6176 %} |
|
6177 ins_pipe( pipe_slow ); |
|
6178 %} |
|
6179 |
|
6180 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ |
|
6181 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
6182 match(Set dst (AddVS dst (LoadVector mem))); |
|
6183 effect(TEMP src); |
|
6184 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} |
|
6185 ins_encode %{ |
|
6186 int vector_len = 0; |
|
6187 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6188 %} |
|
6189 ins_pipe( pipe_slow ); |
|
6190 %} |
|
6191 |
5815 instruct vadd8S(vecX dst, vecX src) %{ |
6192 instruct vadd8S(vecX dst, vecX src) %{ |
5816 predicate(n->as_Vector()->length() == 8); |
6193 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
5817 match(Set dst (AddVS dst src)); |
6194 match(Set dst (AddVS dst src)); |
5818 format %{ "paddw $dst,$src\t! add packed8S" %} |
6195 format %{ "paddw $dst,$src\t! add packed8S" %} |
5819 ins_encode %{ |
6196 ins_encode %{ |
5820 __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
6197 __ paddw($dst$$XMMRegister, $src$$XMMRegister); |
5821 %} |
6198 %} |
5822 ins_pipe( pipe_slow ); |
6199 ins_pipe( pipe_slow ); |
5823 %} |
6200 %} |
5824 |
6201 |
5825 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ |
6202 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ |
5826 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
6203 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
5827 match(Set dst (AddVS src1 src2)); |
6204 match(Set dst (AddVS src1 src2)); |
5828 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} |
6205 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} |
5829 ins_encode %{ |
6206 ins_encode %{ |
5830 int vector_len = 0; |
6207 int vector_len = 0; |
5831 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6208 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5832 %} |
6209 %} |
5833 ins_pipe( pipe_slow ); |
6210 ins_pipe( pipe_slow ); |
5834 %} |
6211 %} |
5835 |
6212 |
5836 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ |
6213 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ |
5837 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
6214 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
6215 match(Set dst (AddVS src1 src2)); |
|
6216 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} |
|
6217 ins_encode %{ |
|
6218 int vector_len = 0; |
|
6219 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6220 %} |
|
6221 ins_pipe( pipe_slow ); |
|
6222 %} |
|
6223 |
|
6224 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ |
|
6225 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
6226 match(Set dst (AddVS dst src2)); |
|
6227 effect(TEMP src1); |
|
6228 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} |
|
6229 ins_encode %{ |
|
6230 int vector_len = 0; |
|
6231 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6232 %} |
|
6233 ins_pipe( pipe_slow ); |
|
6234 %} |
|
6235 |
|
6236 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ |
|
6237 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
5838 match(Set dst (AddVS src (LoadVector mem))); |
6238 match(Set dst (AddVS src (LoadVector mem))); |
5839 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} |
6239 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} |
5840 ins_encode %{ |
6240 ins_encode %{ |
5841 int vector_len = 0; |
6241 int vector_len = 0; |
5842 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6242 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5843 %} |
6243 %} |
5844 ins_pipe( pipe_slow ); |
6244 ins_pipe( pipe_slow ); |
5845 %} |
6245 %} |
5846 |
6246 |
5847 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ |
6247 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ |
5848 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
6248 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
6249 match(Set dst (AddVS src (LoadVector mem))); |
|
6250 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} |
|
6251 ins_encode %{ |
|
6252 int vector_len = 0; |
|
6253 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6254 %} |
|
6255 ins_pipe( pipe_slow ); |
|
6256 %} |
|
6257 |
|
6258 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ |
|
6259 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
6260 match(Set dst (AddVS dst (LoadVector mem))); |
|
6261 effect(TEMP src); |
|
6262 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} |
|
6263 ins_encode %{ |
|
6264 int vector_len = 0; |
|
6265 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6266 %} |
|
6267 ins_pipe( pipe_slow ); |
|
6268 %} |
|
6269 |
|
6270 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ |
|
6271 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
5849 match(Set dst (AddVS src1 src2)); |
6272 match(Set dst (AddVS src1 src2)); |
5850 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} |
6273 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} |
5851 ins_encode %{ |
6274 ins_encode %{ |
5852 int vector_len = 1; |
6275 int vector_len = 1; |
5853 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6276 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5854 %} |
6277 %} |
5855 ins_pipe( pipe_slow ); |
6278 ins_pipe( pipe_slow ); |
5856 %} |
6279 %} |
5857 |
6280 |
5858 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ |
6281 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ |
5859 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
6282 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
6283 match(Set dst (AddVS src1 src2)); |
|
6284 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} |
|
6285 ins_encode %{ |
|
6286 int vector_len = 1; |
|
6287 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6288 %} |
|
6289 ins_pipe( pipe_slow ); |
|
6290 %} |
|
6291 |
|
6292 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ |
|
6293 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
6294 match(Set dst (AddVS dst src2)); |
|
6295 effect(TEMP src1); |
|
6296 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} |
|
6297 ins_encode %{ |
|
6298 int vector_len = 1; |
|
6299 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6300 %} |
|
6301 ins_pipe( pipe_slow ); |
|
6302 %} |
|
6303 |
|
6304 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ |
|
6305 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
5860 match(Set dst (AddVS src (LoadVector mem))); |
6306 match(Set dst (AddVS src (LoadVector mem))); |
5861 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} |
6307 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} |
5862 ins_encode %{ |
6308 ins_encode %{ |
5863 int vector_len = 1; |
6309 int vector_len = 1; |
5864 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6310 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
5865 %} |
6311 %} |
5866 ins_pipe( pipe_slow ); |
6312 ins_pipe( pipe_slow ); |
5867 %} |
6313 %} |
5868 |
6314 |
|
6315 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ |
|
6316 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
6317 match(Set dst (AddVS src (LoadVector mem))); |
|
6318 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} |
|
6319 ins_encode %{ |
|
6320 int vector_len = 1; |
|
6321 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6322 %} |
|
6323 ins_pipe( pipe_slow ); |
|
6324 %} |
|
6325 |
|
6326 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ |
|
6327 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
6328 match(Set dst (AddVS dst (LoadVector mem))); |
|
6329 effect(TEMP src); |
|
6330 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} |
|
6331 ins_encode %{ |
|
6332 int vector_len = 1; |
|
6333 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6334 %} |
|
6335 ins_pipe( pipe_slow ); |
|
6336 %} |
|
6337 |
5869 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
6338 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
5870 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
6339 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
5871 match(Set dst (AddVS src1 src2)); |
6340 match(Set dst (AddVS src1 src2)); |
5872 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} |
6341 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} |
5873 ins_encode %{ |
6342 ins_encode %{ |
5874 int vector_len = 2; |
6343 int vector_len = 2; |
5875 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6344 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
5876 %} |
6345 %} |
5877 ins_pipe( pipe_slow ); |
6346 ins_pipe( pipe_slow ); |
5878 %} |
6347 %} |
5879 |
6348 |
5880 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ |
6349 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ |
5881 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
6350 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
5882 match(Set dst (AddVS src (LoadVector mem))); |
6351 match(Set dst (AddVS src (LoadVector mem))); |
5883 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} |
6352 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} |
5884 ins_encode %{ |
6353 ins_encode %{ |
5885 int vector_len = 2; |
6354 int vector_len = 2; |
5886 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6355 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6262 |
6731 |
6263 // --------------------------------- SUB -------------------------------------- |
6732 // --------------------------------- SUB -------------------------------------- |
6264 |
6733 |
6265 // Bytes vector sub |
6734 // Bytes vector sub |
6266 instruct vsub4B(vecS dst, vecS src) %{ |
6735 instruct vsub4B(vecS dst, vecS src) %{ |
6267 predicate(n->as_Vector()->length() == 4); |
6736 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
6268 match(Set dst (SubVB dst src)); |
6737 match(Set dst (SubVB dst src)); |
6269 format %{ "psubb $dst,$src\t! sub packed4B" %} |
6738 format %{ "psubb $dst,$src\t! sub packed4B" %} |
6270 ins_encode %{ |
6739 ins_encode %{ |
6271 __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
6740 __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
6272 %} |
6741 %} |
6273 ins_pipe( pipe_slow ); |
6742 ins_pipe( pipe_slow ); |
6274 %} |
6743 %} |
6275 |
6744 |
6276 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ |
6745 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ |
6277 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
6746 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
6278 match(Set dst (SubVB src1 src2)); |
6747 match(Set dst (SubVB src1 src2)); |
6279 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} |
6748 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} |
6280 ins_encode %{ |
6749 ins_encode %{ |
6281 int vector_len = 0; |
6750 int vector_len = 0; |
6282 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6751 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6283 %} |
6752 %} |
6284 ins_pipe( pipe_slow ); |
6753 ins_pipe( pipe_slow ); |
6285 %} |
6754 %} |
6286 |
6755 |
6287 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ |
6756 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ |
6288 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
6757 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
6758 match(Set dst (SubVB src1 src2)); |
|
6759 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} |
|
6760 ins_encode %{ |
|
6761 int vector_len = 0; |
|
6762 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6763 %} |
|
6764 ins_pipe( pipe_slow ); |
|
6765 %} |
|
6766 |
|
6767 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ |
|
6768 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
6769 match(Set dst (SubVB dst src2)); |
|
6770 effect(TEMP src1); |
|
6771 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} |
|
6772 ins_encode %{ |
|
6773 int vector_len = 0; |
|
6774 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6775 %} |
|
6776 ins_pipe( pipe_slow ); |
|
6777 %} |
|
6778 |
|
6779 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ |
|
6780 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
6289 match(Set dst (SubVB src (LoadVector mem))); |
6781 match(Set dst (SubVB src (LoadVector mem))); |
6290 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} |
6782 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} |
6291 ins_encode %{ |
6783 ins_encode %{ |
6292 int vector_len = 0; |
6784 int vector_len = 0; |
6293 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6785 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6294 %} |
6786 %} |
6295 ins_pipe( pipe_slow ); |
6787 ins_pipe( pipe_slow ); |
6296 %} |
6788 %} |
6297 |
6789 |
|
6790 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ |
|
6791 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
6792 match(Set dst (SubVB src (LoadVector mem))); |
|
6793 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} |
|
6794 ins_encode %{ |
|
6795 int vector_len = 0; |
|
6796 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6797 %} |
|
6798 ins_pipe( pipe_slow ); |
|
6799 %} |
|
6800 |
|
6801 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ |
|
6802 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
6803 match(Set dst (SubVB dst (LoadVector mem))); |
|
6804 effect(TEMP src); |
|
6805 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} |
|
6806 ins_encode %{ |
|
6807 int vector_len = 0; |
|
6808 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6809 %} |
|
6810 ins_pipe( pipe_slow ); |
|
6811 %} |
|
6812 |
6298 instruct vsub8B(vecD dst, vecD src) %{ |
6813 instruct vsub8B(vecD dst, vecD src) %{ |
6299 predicate(n->as_Vector()->length() == 8); |
6814 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
6300 match(Set dst (SubVB dst src)); |
6815 match(Set dst (SubVB dst src)); |
6301 format %{ "psubb $dst,$src\t! sub packed8B" %} |
6816 format %{ "psubb $dst,$src\t! sub packed8B" %} |
6302 ins_encode %{ |
6817 ins_encode %{ |
6303 __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
6818 __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
6304 %} |
6819 %} |
6305 ins_pipe( pipe_slow ); |
6820 ins_pipe( pipe_slow ); |
6306 %} |
6821 %} |
6307 |
6822 |
6308 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ |
6823 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ |
6309 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
6824 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
6310 match(Set dst (SubVB src1 src2)); |
6825 match(Set dst (SubVB src1 src2)); |
6311 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} |
6826 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} |
6312 ins_encode %{ |
6827 ins_encode %{ |
6313 int vector_len = 0; |
6828 int vector_len = 0; |
6314 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6829 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6315 %} |
6830 %} |
6316 ins_pipe( pipe_slow ); |
6831 ins_pipe( pipe_slow ); |
6317 %} |
6832 %} |
6318 |
6833 |
6319 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ |
6834 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ |
6320 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
6835 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
6836 match(Set dst (SubVB src1 src2)); |
|
6837 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} |
|
6838 ins_encode %{ |
|
6839 int vector_len = 0; |
|
6840 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6841 %} |
|
6842 ins_pipe( pipe_slow ); |
|
6843 %} |
|
6844 |
|
6845 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ |
|
6846 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
6847 match(Set dst (SubVB dst src2)); |
|
6848 effect(TEMP src1); |
|
6849 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} |
|
6850 ins_encode %{ |
|
6851 int vector_len = 0; |
|
6852 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6853 %} |
|
6854 ins_pipe( pipe_slow ); |
|
6855 %} |
|
6856 |
|
6857 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ |
|
6858 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
6321 match(Set dst (SubVB src (LoadVector mem))); |
6859 match(Set dst (SubVB src (LoadVector mem))); |
6322 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} |
6860 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} |
6323 ins_encode %{ |
6861 ins_encode %{ |
6324 int vector_len = 0; |
6862 int vector_len = 0; |
6325 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6863 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6326 %} |
6864 %} |
6327 ins_pipe( pipe_slow ); |
6865 ins_pipe( pipe_slow ); |
6328 %} |
6866 %} |
6329 |
6867 |
|
6868 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ |
|
6869 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
6870 match(Set dst (SubVB src (LoadVector mem))); |
|
6871 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} |
|
6872 ins_encode %{ |
|
6873 int vector_len = 0; |
|
6874 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6875 %} |
|
6876 ins_pipe( pipe_slow ); |
|
6877 %} |
|
6878 |
|
6879 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ |
|
6880 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
6881 match(Set dst (SubVB dst (LoadVector mem))); |
|
6882 effect(TEMP src); |
|
6883 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} |
|
6884 ins_encode %{ |
|
6885 int vector_len = 0; |
|
6886 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6887 %} |
|
6888 ins_pipe( pipe_slow ); |
|
6889 %} |
|
6890 |
6330 instruct vsub16B(vecX dst, vecX src) %{ |
6891 instruct vsub16B(vecX dst, vecX src) %{ |
6331 predicate(n->as_Vector()->length() == 16); |
6892 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); |
6332 match(Set dst (SubVB dst src)); |
6893 match(Set dst (SubVB dst src)); |
6333 format %{ "psubb $dst,$src\t! sub packed16B" %} |
6894 format %{ "psubb $dst,$src\t! sub packed16B" %} |
6334 ins_encode %{ |
6895 ins_encode %{ |
6335 __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
6896 __ psubb($dst$$XMMRegister, $src$$XMMRegister); |
6336 %} |
6897 %} |
6337 ins_pipe( pipe_slow ); |
6898 ins_pipe( pipe_slow ); |
6338 %} |
6899 %} |
6339 |
6900 |
6340 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ |
6901 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ |
6341 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
6902 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); |
6342 match(Set dst (SubVB src1 src2)); |
6903 match(Set dst (SubVB src1 src2)); |
6343 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} |
6904 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} |
6344 ins_encode %{ |
6905 ins_encode %{ |
6345 int vector_len = 0; |
6906 int vector_len = 0; |
6346 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6907 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6347 %} |
6908 %} |
6348 ins_pipe( pipe_slow ); |
6909 ins_pipe( pipe_slow ); |
6349 %} |
6910 %} |
6350 |
6911 |
6351 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ |
6912 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ |
6352 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); |
6913 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
6914 match(Set dst (SubVB src1 src2)); |
|
6915 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} |
|
6916 ins_encode %{ |
|
6917 int vector_len = 0; |
|
6918 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6919 %} |
|
6920 ins_pipe( pipe_slow ); |
|
6921 %} |
|
6922 |
|
6923 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ |
|
6924 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
6925 match(Set dst (SubVB dst src2)); |
|
6926 effect(TEMP src1); |
|
6927 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} |
|
6928 ins_encode %{ |
|
6929 int vector_len = 0; |
|
6930 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6931 %} |
|
6932 ins_pipe( pipe_slow ); |
|
6933 %} |
|
6934 |
|
6935 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ |
|
6936 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); |
6353 match(Set dst (SubVB src (LoadVector mem))); |
6937 match(Set dst (SubVB src (LoadVector mem))); |
6354 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} |
6938 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} |
6355 ins_encode %{ |
6939 ins_encode %{ |
6356 int vector_len = 0; |
6940 int vector_len = 0; |
6357 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6941 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6358 %} |
6942 %} |
6359 ins_pipe( pipe_slow ); |
6943 ins_pipe( pipe_slow ); |
6360 %} |
6944 %} |
6361 |
6945 |
6362 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ |
6946 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ |
6363 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); |
6947 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
6948 match(Set dst (SubVB src (LoadVector mem))); |
|
6949 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} |
|
6950 ins_encode %{ |
|
6951 int vector_len = 0; |
|
6952 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6953 %} |
|
6954 ins_pipe( pipe_slow ); |
|
6955 %} |
|
6956 |
|
6957 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ |
|
6958 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
6959 match(Set dst (SubVB dst (LoadVector mem))); |
|
6960 effect(TEMP src); |
|
6961 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} |
|
6962 ins_encode %{ |
|
6963 int vector_len = 0; |
|
6964 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
6965 %} |
|
6966 ins_pipe( pipe_slow ); |
|
6967 %} |
|
6968 |
|
6969 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ |
|
6970 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); |
6364 match(Set dst (SubVB src1 src2)); |
6971 match(Set dst (SubVB src1 src2)); |
6365 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} |
6972 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} |
6366 ins_encode %{ |
6973 ins_encode %{ |
6367 int vector_len = 1; |
6974 int vector_len = 1; |
6368 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6975 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6369 %} |
6976 %} |
6370 ins_pipe( pipe_slow ); |
6977 ins_pipe( pipe_slow ); |
6371 %} |
6978 %} |
6372 |
6979 |
6373 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ |
6980 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ |
6374 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); |
6981 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
|
6982 match(Set dst (SubVB src1 src2)); |
|
6983 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} |
|
6984 ins_encode %{ |
|
6985 int vector_len = 1; |
|
6986 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6987 %} |
|
6988 ins_pipe( pipe_slow ); |
|
6989 %} |
|
6990 |
|
6991 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ |
|
6992 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); |
|
6993 match(Set dst (SubVB dst src2)); |
|
6994 effect(TEMP src1); |
|
6995 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} |
|
6996 ins_encode %{ |
|
6997 int vector_len = 1; |
|
6998 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
6999 %} |
|
7000 ins_pipe( pipe_slow ); |
|
7001 %} |
|
7002 |
|
7003 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ |
|
7004 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); |
6375 match(Set dst (SubVB src (LoadVector mem))); |
7005 match(Set dst (SubVB src (LoadVector mem))); |
6376 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} |
7006 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} |
6377 ins_encode %{ |
7007 ins_encode %{ |
6378 int vector_len = 1; |
7008 int vector_len = 1; |
6379 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7009 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6380 %} |
7010 %} |
6381 ins_pipe( pipe_slow ); |
7011 ins_pipe( pipe_slow ); |
6382 %} |
7012 %} |
6383 |
7013 |
|
7014 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ |
|
7015 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
|
7016 match(Set dst (SubVB src (LoadVector mem))); |
|
7017 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} |
|
7018 ins_encode %{ |
|
7019 int vector_len = 1; |
|
7020 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7021 %} |
|
7022 ins_pipe( pipe_slow ); |
|
7023 %} |
|
7024 |
|
7025 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ |
|
7026 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); |
|
7027 match(Set dst (SubVB dst (LoadVector mem))); |
|
7028 effect(TEMP src); |
|
7029 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} |
|
7030 ins_encode %{ |
|
7031 int vector_len = 1; |
|
7032 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7033 %} |
|
7034 ins_pipe( pipe_slow ); |
|
7035 %} |
|
7036 |
6384 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
7037 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
6385 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); |
7038 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); |
6386 match(Set dst (SubVB src1 src2)); |
7039 match(Set dst (SubVB src1 src2)); |
6387 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} |
7040 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} |
6388 ins_encode %{ |
7041 ins_encode %{ |
6389 int vector_len = 2; |
7042 int vector_len = 2; |
6390 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7043 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6391 %} |
7044 %} |
6392 ins_pipe( pipe_slow ); |
7045 ins_pipe( pipe_slow ); |
6393 %} |
7046 %} |
6394 |
7047 |
6395 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ |
7048 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ |
6396 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); |
7049 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); |
6397 match(Set dst (SubVB src (LoadVector mem))); |
7050 match(Set dst (SubVB src (LoadVector mem))); |
6398 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} |
7051 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} |
6399 ins_encode %{ |
7052 ins_encode %{ |
6400 int vector_len = 2; |
7053 int vector_len = 2; |
6401 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7054 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6403 ins_pipe( pipe_slow ); |
7056 ins_pipe( pipe_slow ); |
6404 %} |
7057 %} |
6405 |
7058 |
6406 // Shorts/Chars vector sub |
7059 // Shorts/Chars vector sub |
6407 instruct vsub2S(vecS dst, vecS src) %{ |
7060 instruct vsub2S(vecS dst, vecS src) %{ |
6408 predicate(n->as_Vector()->length() == 2); |
7061 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); |
6409 match(Set dst (SubVS dst src)); |
7062 match(Set dst (SubVS dst src)); |
6410 format %{ "psubw $dst,$src\t! sub packed2S" %} |
7063 format %{ "psubw $dst,$src\t! sub packed2S" %} |
6411 ins_encode %{ |
7064 ins_encode %{ |
6412 __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
7065 __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
6413 %} |
7066 %} |
6414 ins_pipe( pipe_slow ); |
7067 ins_pipe( pipe_slow ); |
6415 %} |
7068 %} |
6416 |
7069 |
6417 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ |
7070 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ |
6418 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
7071 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
6419 match(Set dst (SubVS src1 src2)); |
7072 match(Set dst (SubVS src1 src2)); |
6420 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} |
7073 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} |
6421 ins_encode %{ |
7074 ins_encode %{ |
6422 int vector_len = 0; |
7075 int vector_len = 0; |
6423 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7076 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6424 %} |
7077 %} |
6425 ins_pipe( pipe_slow ); |
7078 ins_pipe( pipe_slow ); |
6426 %} |
7079 %} |
6427 |
7080 |
6428 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ |
7081 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ |
6429 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
7082 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
7083 match(Set dst (SubVS src1 src2)); |
|
7084 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} |
|
7085 ins_encode %{ |
|
7086 int vector_len = 0; |
|
7087 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7088 %} |
|
7089 ins_pipe( pipe_slow ); |
|
7090 %} |
|
7091 |
|
7092 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ |
|
7093 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
7094 match(Set dst (SubVS dst src2)); |
|
7095 effect(TEMP src1); |
|
7096 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} |
|
7097 ins_encode %{ |
|
7098 int vector_len = 0; |
|
7099 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7100 %} |
|
7101 ins_pipe( pipe_slow ); |
|
7102 %} |
|
7103 |
|
7104 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ |
|
7105 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
6430 match(Set dst (SubVS src (LoadVector mem))); |
7106 match(Set dst (SubVS src (LoadVector mem))); |
6431 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} |
7107 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} |
6432 ins_encode %{ |
7108 ins_encode %{ |
6433 int vector_len = 0; |
7109 int vector_len = 0; |
6434 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7110 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6435 %} |
7111 %} |
6436 ins_pipe( pipe_slow ); |
7112 ins_pipe( pipe_slow ); |
6437 %} |
7113 %} |
6438 |
7114 |
|
7115 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ |
|
7116 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
7117 match(Set dst (SubVS src (LoadVector mem))); |
|
7118 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} |
|
7119 ins_encode %{ |
|
7120 int vector_len = 0; |
|
7121 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7122 %} |
|
7123 ins_pipe( pipe_slow ); |
|
7124 %} |
|
7125 |
|
7126 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ |
|
7127 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
7128 match(Set dst (SubVS dst (LoadVector mem))); |
|
7129 effect(TEMP src); |
|
7130 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} |
|
7131 ins_encode %{ |
|
7132 int vector_len = 0; |
|
7133 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7134 %} |
|
7135 ins_pipe( pipe_slow ); |
|
7136 %} |
|
7137 |
6439 instruct vsub4S(vecD dst, vecD src) %{ |
7138 instruct vsub4S(vecD dst, vecD src) %{ |
6440 predicate(n->as_Vector()->length() == 4); |
7139 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
6441 match(Set dst (SubVS dst src)); |
7140 match(Set dst (SubVS dst src)); |
6442 format %{ "psubw $dst,$src\t! sub packed4S" %} |
7141 format %{ "psubw $dst,$src\t! sub packed4S" %} |
6443 ins_encode %{ |
7142 ins_encode %{ |
6444 __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
7143 __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
6445 %} |
7144 %} |
6446 ins_pipe( pipe_slow ); |
7145 ins_pipe( pipe_slow ); |
6447 %} |
7146 %} |
6448 |
7147 |
6449 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ |
7148 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ |
6450 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
7149 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
6451 match(Set dst (SubVS src1 src2)); |
7150 match(Set dst (SubVS src1 src2)); |
6452 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} |
7151 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} |
6453 ins_encode %{ |
7152 ins_encode %{ |
6454 int vector_len = 0; |
7153 int vector_len = 0; |
6455 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7154 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6456 %} |
7155 %} |
6457 ins_pipe( pipe_slow ); |
7156 ins_pipe( pipe_slow ); |
6458 %} |
7157 %} |
6459 |
7158 |
6460 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ |
7159 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ |
6461 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
7160 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
7161 match(Set dst (SubVS src1 src2)); |
|
7162 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} |
|
7163 ins_encode %{ |
|
7164 int vector_len = 0; |
|
7165 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7166 %} |
|
7167 ins_pipe( pipe_slow ); |
|
7168 %} |
|
7169 |
|
7170 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ |
|
7171 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
7172 match(Set dst (SubVS dst src2)); |
|
7173 effect(TEMP src1); |
|
7174 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} |
|
7175 ins_encode %{ |
|
7176 int vector_len = 0; |
|
7177 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7178 %} |
|
7179 ins_pipe( pipe_slow ); |
|
7180 %} |
|
7181 |
|
7182 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ |
|
7183 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
6462 match(Set dst (SubVS src (LoadVector mem))); |
7184 match(Set dst (SubVS src (LoadVector mem))); |
6463 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} |
7185 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} |
6464 ins_encode %{ |
7186 ins_encode %{ |
6465 int vector_len = 0; |
7187 int vector_len = 0; |
6466 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7188 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6467 %} |
7189 %} |
6468 ins_pipe( pipe_slow ); |
7190 ins_pipe( pipe_slow ); |
6469 %} |
7191 %} |
6470 |
7192 |
|
7193 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ |
|
7194 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
7195 match(Set dst (SubVS src (LoadVector mem))); |
|
7196 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} |
|
7197 ins_encode %{ |
|
7198 int vector_len = 0; |
|
7199 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7200 %} |
|
7201 ins_pipe( pipe_slow ); |
|
7202 %} |
|
7203 |
|
7204 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ |
|
7205 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
7206 match(Set dst (SubVS dst (LoadVector mem))); |
|
7207 effect(TEMP src); |
|
7208 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} |
|
7209 ins_encode %{ |
|
7210 int vector_len = 0; |
|
7211 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7212 %} |
|
7213 ins_pipe( pipe_slow ); |
|
7214 %} |
|
7215 |
6471 instruct vsub8S(vecX dst, vecX src) %{ |
7216 instruct vsub8S(vecX dst, vecX src) %{ |
6472 predicate(n->as_Vector()->length() == 8); |
7217 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
6473 match(Set dst (SubVS dst src)); |
7218 match(Set dst (SubVS dst src)); |
6474 format %{ "psubw $dst,$src\t! sub packed8S" %} |
7219 format %{ "psubw $dst,$src\t! sub packed8S" %} |
6475 ins_encode %{ |
7220 ins_encode %{ |
6476 __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
7221 __ psubw($dst$$XMMRegister, $src$$XMMRegister); |
6477 %} |
7222 %} |
6478 ins_pipe( pipe_slow ); |
7223 ins_pipe( pipe_slow ); |
6479 %} |
7224 %} |
6480 |
7225 |
6481 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ |
7226 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ |
6482 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
7227 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
6483 match(Set dst (SubVS src1 src2)); |
7228 match(Set dst (SubVS src1 src2)); |
6484 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} |
7229 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} |
6485 ins_encode %{ |
7230 ins_encode %{ |
6486 int vector_len = 0; |
7231 int vector_len = 0; |
6487 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7232 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6488 %} |
7233 %} |
6489 ins_pipe( pipe_slow ); |
7234 ins_pipe( pipe_slow ); |
6490 %} |
7235 %} |
6491 |
7236 |
6492 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ |
7237 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ |
6493 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
7238 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
7239 match(Set dst (SubVS src1 src2)); |
|
7240 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} |
|
7241 ins_encode %{ |
|
7242 int vector_len = 0; |
|
7243 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7244 %} |
|
7245 ins_pipe( pipe_slow ); |
|
7246 %} |
|
7247 |
|
7248 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ |
|
7249 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
7250 match(Set dst (SubVS dst src2)); |
|
7251 effect(TEMP src1); |
|
7252 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} |
|
7253 ins_encode %{ |
|
7254 int vector_len = 0; |
|
7255 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7256 %} |
|
7257 ins_pipe( pipe_slow ); |
|
7258 %} |
|
7259 |
|
7260 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ |
|
7261 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
6494 match(Set dst (SubVS src (LoadVector mem))); |
7262 match(Set dst (SubVS src (LoadVector mem))); |
6495 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} |
7263 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} |
6496 ins_encode %{ |
7264 ins_encode %{ |
6497 int vector_len = 0; |
7265 int vector_len = 0; |
6498 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7266 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6499 %} |
7267 %} |
6500 ins_pipe( pipe_slow ); |
7268 ins_pipe( pipe_slow ); |
6501 %} |
7269 %} |
6502 |
7270 |
6503 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ |
7271 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ |
6504 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
7272 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
7273 match(Set dst (SubVS src (LoadVector mem))); |
|
7274 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} |
|
7275 ins_encode %{ |
|
7276 int vector_len = 0; |
|
7277 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7278 %} |
|
7279 ins_pipe( pipe_slow ); |
|
7280 %} |
|
7281 |
|
7282 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ |
|
7283 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
7284 match(Set dst (SubVS dst (LoadVector mem))); |
|
7285 effect(TEMP src); |
|
7286 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} |
|
7287 ins_encode %{ |
|
7288 int vector_len = 0; |
|
7289 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7290 %} |
|
7291 ins_pipe( pipe_slow ); |
|
7292 %} |
|
7293 |
|
7294 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ |
|
7295 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
6505 match(Set dst (SubVS src1 src2)); |
7296 match(Set dst (SubVS src1 src2)); |
6506 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} |
7297 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} |
6507 ins_encode %{ |
7298 ins_encode %{ |
6508 int vector_len = 1; |
7299 int vector_len = 1; |
6509 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7300 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6510 %} |
7301 %} |
6511 ins_pipe( pipe_slow ); |
7302 ins_pipe( pipe_slow ); |
6512 %} |
7303 %} |
6513 |
7304 |
6514 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ |
7305 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ |
6515 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
7306 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
7307 match(Set dst (SubVS src1 src2)); |
|
7308 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} |
|
7309 ins_encode %{ |
|
7310 int vector_len = 1; |
|
7311 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7312 %} |
|
7313 ins_pipe( pipe_slow ); |
|
7314 %} |
|
7315 |
|
7316 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ |
|
7317 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
7318 match(Set dst (SubVS dst src2)); |
|
7319 effect(TEMP src1); |
|
7320 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} |
|
7321 ins_encode %{ |
|
7322 int vector_len = 1; |
|
7323 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7324 %} |
|
7325 ins_pipe( pipe_slow ); |
|
7326 %} |
|
7327 |
|
7328 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ |
|
7329 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
6516 match(Set dst (SubVS src (LoadVector mem))); |
7330 match(Set dst (SubVS src (LoadVector mem))); |
6517 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} |
7331 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} |
6518 ins_encode %{ |
7332 ins_encode %{ |
6519 int vector_len = 1; |
7333 int vector_len = 1; |
6520 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7334 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6521 %} |
7335 %} |
6522 ins_pipe( pipe_slow ); |
7336 ins_pipe( pipe_slow ); |
6523 %} |
7337 %} |
6524 |
7338 |
|
7339 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ |
|
7340 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
7341 match(Set dst (SubVS src (LoadVector mem))); |
|
7342 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} |
|
7343 ins_encode %{ |
|
7344 int vector_len = 1; |
|
7345 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7346 %} |
|
7347 ins_pipe( pipe_slow ); |
|
7348 %} |
|
7349 |
|
7350 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ |
|
7351 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
7352 match(Set dst (SubVS dst (LoadVector mem))); |
|
7353 effect(TEMP src); |
|
7354 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} |
|
7355 ins_encode %{ |
|
7356 int vector_len = 1; |
|
7357 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7358 %} |
|
7359 ins_pipe( pipe_slow ); |
|
7360 %} |
|
7361 |
6525 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
7362 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
6526 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
7363 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
6527 match(Set dst (SubVS src1 src2)); |
7364 match(Set dst (SubVS src1 src2)); |
6528 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} |
7365 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} |
6529 ins_encode %{ |
7366 ins_encode %{ |
6530 int vector_len = 2; |
7367 int vector_len = 2; |
6531 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7368 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6532 %} |
7369 %} |
6533 ins_pipe( pipe_slow ); |
7370 ins_pipe( pipe_slow ); |
6534 %} |
7371 %} |
6535 |
7372 |
6536 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ |
7373 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ |
6537 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
7374 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
6538 match(Set dst (SubVS src (LoadVector mem))); |
7375 match(Set dst (SubVS src (LoadVector mem))); |
6539 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} |
7376 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} |
6540 ins_encode %{ |
7377 ins_encode %{ |
6541 int vector_len = 2; |
7378 int vector_len = 2; |
6542 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7379 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6918 |
7755 |
6919 // --------------------------------- MUL -------------------------------------- |
7756 // --------------------------------- MUL -------------------------------------- |
6920 |
7757 |
6921 // Shorts/Chars vector mul |
7758 // Shorts/Chars vector mul |
6922 instruct vmul2S(vecS dst, vecS src) %{ |
7759 instruct vmul2S(vecS dst, vecS src) %{ |
6923 predicate(n->as_Vector()->length() == 2); |
7760 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); |
6924 match(Set dst (MulVS dst src)); |
7761 match(Set dst (MulVS dst src)); |
6925 format %{ "pmullw $dst,$src\t! mul packed2S" %} |
7762 format %{ "pmullw $dst,$src\t! mul packed2S" %} |
6926 ins_encode %{ |
7763 ins_encode %{ |
6927 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
7764 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
6928 %} |
7765 %} |
6929 ins_pipe( pipe_slow ); |
7766 ins_pipe( pipe_slow ); |
6930 %} |
7767 %} |
6931 |
7768 |
6932 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ |
7769 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ |
6933 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
7770 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
6934 match(Set dst (MulVS src1 src2)); |
7771 match(Set dst (MulVS src1 src2)); |
6935 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} |
7772 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} |
6936 ins_encode %{ |
7773 ins_encode %{ |
6937 int vector_len = 0; |
7774 int vector_len = 0; |
6938 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7775 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6939 %} |
7776 %} |
6940 ins_pipe( pipe_slow ); |
7777 ins_pipe( pipe_slow ); |
6941 %} |
7778 %} |
6942 |
7779 |
6943 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ |
7780 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ |
6944 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
7781 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
7782 match(Set dst (MulVS src1 src2)); |
|
7783 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} |
|
7784 ins_encode %{ |
|
7785 int vector_len = 0; |
|
7786 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7787 %} |
|
7788 ins_pipe( pipe_slow ); |
|
7789 %} |
|
7790 |
|
7791 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ |
|
7792 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
7793 match(Set dst (MulVS dst src2)); |
|
7794 effect(TEMP src1); |
|
7795 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} |
|
7796 ins_encode %{ |
|
7797 int vector_len = 0; |
|
7798 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7799 %} |
|
7800 ins_pipe( pipe_slow ); |
|
7801 %} |
|
7802 |
|
7803 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ |
|
7804 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
6945 match(Set dst (MulVS src (LoadVector mem))); |
7805 match(Set dst (MulVS src (LoadVector mem))); |
6946 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} |
7806 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} |
6947 ins_encode %{ |
7807 ins_encode %{ |
6948 int vector_len = 0; |
7808 int vector_len = 0; |
6949 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7809 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6950 %} |
7810 %} |
6951 ins_pipe( pipe_slow ); |
7811 ins_pipe( pipe_slow ); |
6952 %} |
7812 %} |
6953 |
7813 |
|
7814 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ |
|
7815 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
7816 match(Set dst (MulVS src (LoadVector mem))); |
|
7817 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} |
|
7818 ins_encode %{ |
|
7819 int vector_len = 0; |
|
7820 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7821 %} |
|
7822 ins_pipe( pipe_slow ); |
|
7823 %} |
|
7824 |
|
7825 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ |
|
7826 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
7827 match(Set dst (MulVS dst (LoadVector mem))); |
|
7828 effect(TEMP src); |
|
7829 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} |
|
7830 ins_encode %{ |
|
7831 int vector_len = 0; |
|
7832 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7833 %} |
|
7834 ins_pipe( pipe_slow ); |
|
7835 %} |
|
7836 |
6954 instruct vmul4S(vecD dst, vecD src) %{ |
7837 instruct vmul4S(vecD dst, vecD src) %{ |
6955 predicate(n->as_Vector()->length() == 4); |
7838 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
6956 match(Set dst (MulVS dst src)); |
7839 match(Set dst (MulVS dst src)); |
6957 format %{ "pmullw $dst,$src\t! mul packed4S" %} |
7840 format %{ "pmullw $dst,$src\t! mul packed4S" %} |
6958 ins_encode %{ |
7841 ins_encode %{ |
6959 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
7842 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
6960 %} |
7843 %} |
6961 ins_pipe( pipe_slow ); |
7844 ins_pipe( pipe_slow ); |
6962 %} |
7845 %} |
6963 |
7846 |
6964 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ |
7847 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ |
6965 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
7848 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
6966 match(Set dst (MulVS src1 src2)); |
7849 match(Set dst (MulVS src1 src2)); |
6967 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} |
7850 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} |
6968 ins_encode %{ |
7851 ins_encode %{ |
6969 int vector_len = 0; |
7852 int vector_len = 0; |
6970 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7853 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
6971 %} |
7854 %} |
6972 ins_pipe( pipe_slow ); |
7855 ins_pipe( pipe_slow ); |
6973 %} |
7856 %} |
6974 |
7857 |
6975 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ |
7858 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ |
6976 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
7859 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
7860 match(Set dst (MulVS src1 src2)); |
|
7861 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} |
|
7862 ins_encode %{ |
|
7863 int vector_len = 0; |
|
7864 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7865 %} |
|
7866 ins_pipe( pipe_slow ); |
|
7867 %} |
|
7868 |
|
7869 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ |
|
7870 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
7871 match(Set dst (MulVS dst src2)); |
|
7872 effect(TEMP src1); |
|
7873 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} |
|
7874 ins_encode %{ |
|
7875 int vector_len = 0; |
|
7876 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7877 %} |
|
7878 ins_pipe( pipe_slow ); |
|
7879 %} |
|
7880 |
|
7881 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ |
|
7882 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
6977 match(Set dst (MulVS src (LoadVector mem))); |
7883 match(Set dst (MulVS src (LoadVector mem))); |
6978 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} |
7884 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} |
6979 ins_encode %{ |
7885 ins_encode %{ |
6980 int vector_len = 0; |
7886 int vector_len = 0; |
6981 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7887 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
6982 %} |
7888 %} |
6983 ins_pipe( pipe_slow ); |
7889 ins_pipe( pipe_slow ); |
6984 %} |
7890 %} |
6985 |
7891 |
|
7892 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ |
|
7893 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
7894 match(Set dst (MulVS src (LoadVector mem))); |
|
7895 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} |
|
7896 ins_encode %{ |
|
7897 int vector_len = 0; |
|
7898 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7899 %} |
|
7900 ins_pipe( pipe_slow ); |
|
7901 %} |
|
7902 |
|
7903 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ |
|
7904 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
7905 match(Set dst (MulVS dst (LoadVector mem))); |
|
7906 effect(TEMP src); |
|
7907 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} |
|
7908 ins_encode %{ |
|
7909 int vector_len = 0; |
|
7910 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7911 %} |
|
7912 ins_pipe( pipe_slow ); |
|
7913 %} |
|
7914 |
6986 instruct vmul8S(vecX dst, vecX src) %{ |
7915 instruct vmul8S(vecX dst, vecX src) %{ |
6987 predicate(n->as_Vector()->length() == 8); |
7916 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
6988 match(Set dst (MulVS dst src)); |
7917 match(Set dst (MulVS dst src)); |
6989 format %{ "pmullw $dst,$src\t! mul packed8S" %} |
7918 format %{ "pmullw $dst,$src\t! mul packed8S" %} |
6990 ins_encode %{ |
7919 ins_encode %{ |
6991 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
7920 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); |
6992 %} |
7921 %} |
6993 ins_pipe( pipe_slow ); |
7922 ins_pipe( pipe_slow ); |
6994 %} |
7923 %} |
6995 |
7924 |
6996 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ |
7925 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ |
6997 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
7926 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
6998 match(Set dst (MulVS src1 src2)); |
7927 match(Set dst (MulVS src1 src2)); |
6999 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} |
7928 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} |
7000 ins_encode %{ |
7929 ins_encode %{ |
7001 int vector_len = 0; |
7930 int vector_len = 0; |
7002 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7931 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7003 %} |
7932 %} |
7004 ins_pipe( pipe_slow ); |
7933 ins_pipe( pipe_slow ); |
7005 %} |
7934 %} |
7006 |
7935 |
7007 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ |
7936 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ |
7008 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
7937 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
7938 match(Set dst (MulVS src1 src2)); |
|
7939 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} |
|
7940 ins_encode %{ |
|
7941 int vector_len = 0; |
|
7942 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7943 %} |
|
7944 ins_pipe( pipe_slow ); |
|
7945 %} |
|
7946 |
|
7947 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ |
|
7948 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
7949 match(Set dst (MulVS dst src2)); |
|
7950 effect(TEMP src1); |
|
7951 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} |
|
7952 ins_encode %{ |
|
7953 int vector_len = 0; |
|
7954 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
7955 %} |
|
7956 ins_pipe( pipe_slow ); |
|
7957 %} |
|
7958 |
|
7959 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ |
|
7960 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
7009 match(Set dst (MulVS src (LoadVector mem))); |
7961 match(Set dst (MulVS src (LoadVector mem))); |
7010 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} |
7962 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} |
7011 ins_encode %{ |
7963 ins_encode %{ |
7012 int vector_len = 0; |
7964 int vector_len = 0; |
7013 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7965 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7014 %} |
7966 %} |
7015 ins_pipe( pipe_slow ); |
7967 ins_pipe( pipe_slow ); |
7016 %} |
7968 %} |
7017 |
7969 |
7018 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ |
7970 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ |
7019 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
7971 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
7972 match(Set dst (MulVS src (LoadVector mem))); |
|
7973 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} |
|
7974 ins_encode %{ |
|
7975 int vector_len = 0; |
|
7976 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7977 %} |
|
7978 ins_pipe( pipe_slow ); |
|
7979 %} |
|
7980 |
|
7981 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ |
|
7982 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
7983 match(Set dst (MulVS dst (LoadVector mem))); |
|
7984 effect(TEMP src); |
|
7985 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} |
|
7986 ins_encode %{ |
|
7987 int vector_len = 0; |
|
7988 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
7989 %} |
|
7990 ins_pipe( pipe_slow ); |
|
7991 %} |
|
7992 |
|
7993 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ |
|
7994 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
7020 match(Set dst (MulVS src1 src2)); |
7995 match(Set dst (MulVS src1 src2)); |
7021 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} |
7996 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} |
7022 ins_encode %{ |
7997 ins_encode %{ |
7023 int vector_len = 1; |
7998 int vector_len = 1; |
7024 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7999 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7025 %} |
8000 %} |
7026 ins_pipe( pipe_slow ); |
8001 ins_pipe( pipe_slow ); |
7027 %} |
8002 %} |
7028 |
8003 |
7029 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ |
8004 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ |
7030 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
8005 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
8006 match(Set dst (MulVS src1 src2)); |
|
8007 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} |
|
8008 ins_encode %{ |
|
8009 int vector_len = 1; |
|
8010 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
8011 %} |
|
8012 ins_pipe( pipe_slow ); |
|
8013 %} |
|
8014 |
|
8015 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ |
|
8016 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
8017 match(Set dst (MulVS dst src2)); |
|
8018 effect(TEMP src1); |
|
8019 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} |
|
8020 ins_encode %{ |
|
8021 int vector_len = 1; |
|
8022 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
|
8023 %} |
|
8024 ins_pipe( pipe_slow ); |
|
8025 %} |
|
8026 |
|
8027 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ |
|
8028 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
7031 match(Set dst (MulVS src (LoadVector mem))); |
8029 match(Set dst (MulVS src (LoadVector mem))); |
7032 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} |
8030 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} |
7033 ins_encode %{ |
8031 ins_encode %{ |
7034 int vector_len = 1; |
8032 int vector_len = 1; |
7035 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
8033 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7036 %} |
8034 %} |
7037 ins_pipe( pipe_slow ); |
8035 ins_pipe( pipe_slow ); |
7038 %} |
8036 %} |
7039 |
8037 |
|
8038 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ |
|
8039 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
8040 match(Set dst (MulVS src (LoadVector mem))); |
|
8041 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} |
|
8042 ins_encode %{ |
|
8043 int vector_len = 1; |
|
8044 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
8045 %} |
|
8046 ins_pipe( pipe_slow ); |
|
8047 %} |
|
8048 |
|
8049 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ |
|
8050 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
8051 match(Set dst (MulVS dst (LoadVector mem))); |
|
8052 effect(TEMP src); |
|
8053 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} |
|
8054 ins_encode %{ |
|
8055 int vector_len = 1; |
|
8056 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
|
8057 %} |
|
8058 ins_pipe( pipe_slow ); |
|
8059 %} |
|
8060 |
7040 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
8061 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ |
7041 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
8062 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
7042 match(Set dst (MulVS src1 src2)); |
8063 match(Set dst (MulVS src1 src2)); |
7043 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} |
8064 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} |
7044 ins_encode %{ |
8065 ins_encode %{ |
7045 int vector_len = 2; |
8066 int vector_len = 2; |
7046 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
8067 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); |
7047 %} |
8068 %} |
7048 ins_pipe( pipe_slow ); |
8069 ins_pipe( pipe_slow ); |
7049 %} |
8070 %} |
7050 |
8071 |
7051 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ |
8072 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ |
7052 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
8073 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
7053 match(Set dst (MulVS src (LoadVector mem))); |
8074 match(Set dst (MulVS src (LoadVector mem))); |
7054 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} |
8075 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} |
7055 ins_encode %{ |
8076 ins_encode %{ |
7056 int vector_len = 2; |
8077 int vector_len = 2; |
7057 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
8078 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); |
7709 |
8730 |
7710 // ------------------------------ LeftShift ----------------------------------- |
8731 // ------------------------------ LeftShift ----------------------------------- |
7711 |
8732 |
7712 // Shorts/Chars vector left shift |
8733 // Shorts/Chars vector left shift |
7713 instruct vsll2S(vecS dst, vecS shift) %{ |
8734 instruct vsll2S(vecS dst, vecS shift) %{ |
7714 predicate(n->as_Vector()->length() == 2); |
8735 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); |
7715 match(Set dst (LShiftVS dst shift)); |
8736 match(Set dst (LShiftVS dst shift)); |
7716 format %{ "psllw $dst,$shift\t! left shift packed2S" %} |
8737 format %{ "psllw $dst,$shift\t! left shift packed2S" %} |
7717 ins_encode %{ |
8738 ins_encode %{ |
7718 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
8739 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
7719 %} |
8740 %} |
7720 ins_pipe( pipe_slow ); |
8741 ins_pipe( pipe_slow ); |
7721 %} |
8742 %} |
7722 |
8743 |
7723 instruct vsll2S_imm(vecS dst, immI8 shift) %{ |
8744 instruct vsll2S_imm(vecS dst, immI8 shift) %{ |
7724 predicate(n->as_Vector()->length() == 2); |
8745 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); |
7725 match(Set dst (LShiftVS dst shift)); |
8746 match(Set dst (LShiftVS dst shift)); |
7726 format %{ "psllw $dst,$shift\t! left shift packed2S" %} |
8747 format %{ "psllw $dst,$shift\t! left shift packed2S" %} |
7727 ins_encode %{ |
8748 ins_encode %{ |
7728 __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
8749 __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
7729 %} |
8750 %} |
7730 ins_pipe( pipe_slow ); |
8751 ins_pipe( pipe_slow ); |
7731 %} |
8752 %} |
7732 |
8753 |
7733 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ |
8754 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ |
7734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
8755 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
7735 match(Set dst (LShiftVS src shift)); |
8756 match(Set dst (LShiftVS src shift)); |
7736 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
8757 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
7737 ins_encode %{ |
8758 ins_encode %{ |
7738 int vector_len = 0; |
8759 int vector_len = 0; |
7739 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8760 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
7740 %} |
8761 %} |
7741 ins_pipe( pipe_slow ); |
8762 ins_pipe( pipe_slow ); |
7742 %} |
8763 %} |
7743 |
8764 |
7744 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ |
8765 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ |
7745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
8766 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
7746 match(Set dst (LShiftVS src shift)); |
8767 match(Set dst (LShiftVS src shift)); |
7747 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
8768 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
7748 ins_encode %{ |
8769 ins_encode %{ |
7749 int vector_len = 0; |
8770 int vector_len = 0; |
|
8771 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
8772 %} |
|
8773 ins_pipe( pipe_slow ); |
|
8774 %} |
|
8775 |
|
8776 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ |
|
8777 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
8778 match(Set dst (LShiftVS dst shift)); |
|
8779 effect(TEMP src); |
|
8780 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
|
8781 ins_encode %{ |
|
8782 int vector_len = 0; |
|
8783 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
8784 %} |
|
8785 ins_pipe( pipe_slow ); |
|
8786 %} |
|
8787 |
|
8788 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ |
|
8789 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
|
8790 match(Set dst (LShiftVS src shift)); |
|
8791 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
|
8792 ins_encode %{ |
|
8793 int vector_len = 0; |
7750 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8794 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
7751 %} |
8795 %} |
7752 ins_pipe( pipe_slow ); |
8796 ins_pipe( pipe_slow ); |
7753 %} |
8797 %} |
7754 |
8798 |
|
8799 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ |
|
8800 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
8801 match(Set dst (LShiftVS src shift)); |
|
8802 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
|
8803 ins_encode %{ |
|
8804 int vector_len = 0; |
|
8805 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
8806 %} |
|
8807 ins_pipe( pipe_slow ); |
|
8808 %} |
|
8809 |
|
8810 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ |
|
8811 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
8812 match(Set dst (LShiftVS dst shift)); |
|
8813 effect(TEMP src); |
|
8814 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} |
|
8815 ins_encode %{ |
|
8816 int vector_len = 0; |
|
8817 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
8818 %} |
|
8819 ins_pipe( pipe_slow ); |
|
8820 %} |
|
8821 |
7755 instruct vsll4S(vecD dst, vecS shift) %{ |
8822 instruct vsll4S(vecD dst, vecS shift) %{ |
7756 predicate(n->as_Vector()->length() == 4); |
8823 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
7757 match(Set dst (LShiftVS dst shift)); |
8824 match(Set dst (LShiftVS dst shift)); |
7758 format %{ "psllw $dst,$shift\t! left shift packed4S" %} |
8825 format %{ "psllw $dst,$shift\t! left shift packed4S" %} |
7759 ins_encode %{ |
8826 ins_encode %{ |
7760 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
8827 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
7761 %} |
8828 %} |
7762 ins_pipe( pipe_slow ); |
8829 ins_pipe( pipe_slow ); |
7763 %} |
8830 %} |
7764 |
8831 |
7765 instruct vsll4S_imm(vecD dst, immI8 shift) %{ |
8832 instruct vsll4S_imm(vecD dst, immI8 shift) %{ |
7766 predicate(n->as_Vector()->length() == 4); |
8833 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
7767 match(Set dst (LShiftVS dst shift)); |
8834 match(Set dst (LShiftVS dst shift)); |
7768 format %{ "psllw $dst,$shift\t! left shift packed4S" %} |
8835 format %{ "psllw $dst,$shift\t! left shift packed4S" %} |
7769 ins_encode %{ |
8836 ins_encode %{ |
7770 __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
8837 __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
7771 %} |
8838 %} |
7772 ins_pipe( pipe_slow ); |
8839 ins_pipe( pipe_slow ); |
7773 %} |
8840 %} |
7774 |
8841 |
7775 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ |
8842 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ |
7776 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
8843 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
7777 match(Set dst (LShiftVS src shift)); |
8844 match(Set dst (LShiftVS src shift)); |
7778 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
8845 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
7779 ins_encode %{ |
8846 ins_encode %{ |
7780 int vector_len = 0; |
8847 int vector_len = 0; |
7781 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8848 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
7782 %} |
8849 %} |
7783 ins_pipe( pipe_slow ); |
8850 ins_pipe( pipe_slow ); |
7784 %} |
8851 %} |
7785 |
8852 |
7786 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
8853 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ |
7787 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
8854 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
7788 match(Set dst (LShiftVS src shift)); |
8855 match(Set dst (LShiftVS src shift)); |
7789 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
8856 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
7790 ins_encode %{ |
8857 ins_encode %{ |
7791 int vector_len = 0; |
8858 int vector_len = 0; |
|
8859 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
8860 %} |
|
8861 ins_pipe( pipe_slow ); |
|
8862 %} |
|
8863 |
|
8864 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ |
|
8865 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
8866 match(Set dst (LShiftVS dst shift)); |
|
8867 effect(TEMP src); |
|
8868 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
|
8869 ins_encode %{ |
|
8870 int vector_len = 0; |
|
8871 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
8872 %} |
|
8873 ins_pipe( pipe_slow ); |
|
8874 %} |
|
8875 |
|
8876 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ |
|
8877 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
|
8878 match(Set dst (LShiftVS src shift)); |
|
8879 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
|
8880 ins_encode %{ |
|
8881 int vector_len = 0; |
7792 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8882 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
7793 %} |
8883 %} |
7794 ins_pipe( pipe_slow ); |
8884 ins_pipe( pipe_slow ); |
7795 %} |
8885 %} |
7796 |
8886 |
|
8887 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ |
|
8888 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
8889 match(Set dst (LShiftVS src shift)); |
|
8890 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
|
8891 ins_encode %{ |
|
8892 int vector_len = 0; |
|
8893 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
8894 %} |
|
8895 ins_pipe( pipe_slow ); |
|
8896 %} |
|
8897 |
|
8898 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ |
|
8899 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
8900 match(Set dst (LShiftVS dst shift)); |
|
8901 effect(TEMP src); |
|
8902 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} |
|
8903 ins_encode %{ |
|
8904 int vector_len = 0; |
|
8905 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
8906 %} |
|
8907 ins_pipe( pipe_slow ); |
|
8908 %} |
|
8909 |
7797 instruct vsll8S(vecX dst, vecS shift) %{ |
8910 instruct vsll8S(vecX dst, vecS shift) %{ |
7798 predicate(n->as_Vector()->length() == 8); |
8911 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
7799 match(Set dst (LShiftVS dst shift)); |
8912 match(Set dst (LShiftVS dst shift)); |
7800 format %{ "psllw $dst,$shift\t! left shift packed8S" %} |
8913 format %{ "psllw $dst,$shift\t! left shift packed8S" %} |
7801 ins_encode %{ |
8914 ins_encode %{ |
7802 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
8915 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); |
7803 %} |
8916 %} |
7804 ins_pipe( pipe_slow ); |
8917 ins_pipe( pipe_slow ); |
7805 %} |
8918 %} |
7806 |
8919 |
7807 instruct vsll8S_imm(vecX dst, immI8 shift) %{ |
8920 instruct vsll8S_imm(vecX dst, immI8 shift) %{ |
7808 predicate(n->as_Vector()->length() == 8); |
8921 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
7809 match(Set dst (LShiftVS dst shift)); |
8922 match(Set dst (LShiftVS dst shift)); |
7810 format %{ "psllw $dst,$shift\t! left shift packed8S" %} |
8923 format %{ "psllw $dst,$shift\t! left shift packed8S" %} |
7811 ins_encode %{ |
8924 ins_encode %{ |
7812 __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
8925 __ psllw($dst$$XMMRegister, (int)$shift$$constant); |
7813 %} |
8926 %} |
7814 ins_pipe( pipe_slow ); |
8927 ins_pipe( pipe_slow ); |
7815 %} |
8928 %} |
7816 |
8929 |
7817 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ |
8930 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ |
7818 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
8931 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
7819 match(Set dst (LShiftVS src shift)); |
8932 match(Set dst (LShiftVS src shift)); |
7820 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
8933 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
7821 ins_encode %{ |
8934 ins_encode %{ |
7822 int vector_len = 0; |
8935 int vector_len = 0; |
7823 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8936 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
7824 %} |
8937 %} |
7825 ins_pipe( pipe_slow ); |
8938 ins_pipe( pipe_slow ); |
7826 %} |
8939 %} |
7827 |
8940 |
7828 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
8941 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ |
7829 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
8942 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
7830 match(Set dst (LShiftVS src shift)); |
8943 match(Set dst (LShiftVS src shift)); |
7831 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
8944 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
7832 ins_encode %{ |
8945 ins_encode %{ |
7833 int vector_len = 0; |
8946 int vector_len = 0; |
|
8947 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
8948 %} |
|
8949 ins_pipe( pipe_slow ); |
|
8950 %} |
|
8951 |
|
8952 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ |
|
8953 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
8954 match(Set dst (LShiftVS dst shift)); |
|
8955 effect(TEMP src); |
|
8956 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
|
8957 ins_encode %{ |
|
8958 int vector_len = 0; |
|
8959 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
8960 %} |
|
8961 ins_pipe( pipe_slow ); |
|
8962 %} |
|
8963 |
|
8964 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ |
|
8965 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
|
8966 match(Set dst (LShiftVS src shift)); |
|
8967 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
|
8968 ins_encode %{ |
|
8969 int vector_len = 0; |
7834 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8970 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
7835 %} |
8971 %} |
7836 ins_pipe( pipe_slow ); |
8972 ins_pipe( pipe_slow ); |
7837 %} |
8973 %} |
7838 |
8974 |
7839 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ |
8975 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ |
7840 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
8976 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
8977 match(Set dst (LShiftVS src shift)); |
|
8978 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
|
8979 ins_encode %{ |
|
8980 int vector_len = 0; |
|
8981 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
8982 %} |
|
8983 ins_pipe( pipe_slow ); |
|
8984 %} |
|
8985 |
|
8986 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ |
|
8987 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
8988 match(Set dst (LShiftVS dst shift)); |
|
8989 effect(TEMP src); |
|
8990 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} |
|
8991 ins_encode %{ |
|
8992 int vector_len = 0; |
|
8993 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
8994 %} |
|
8995 ins_pipe( pipe_slow ); |
|
8996 %} |
|
8997 |
|
8998 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ |
|
8999 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
7841 match(Set dst (LShiftVS src shift)); |
9000 match(Set dst (LShiftVS src shift)); |
7842 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
9001 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
7843 ins_encode %{ |
9002 ins_encode %{ |
7844 int vector_len = 1; |
9003 int vector_len = 1; |
7845 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9004 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
7846 %} |
9005 %} |
7847 ins_pipe( pipe_slow ); |
9006 ins_pipe( pipe_slow ); |
7848 %} |
9007 %} |
7849 |
9008 |
7850 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
9009 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ |
7851 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
9010 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
7852 match(Set dst (LShiftVS src shift)); |
9011 match(Set dst (LShiftVS src shift)); |
7853 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
9012 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
7854 ins_encode %{ |
9013 ins_encode %{ |
7855 int vector_len = 1; |
9014 int vector_len = 1; |
|
9015 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9016 %} |
|
9017 ins_pipe( pipe_slow ); |
|
9018 %} |
|
9019 |
|
9020 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ |
|
9021 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
9022 match(Set dst (LShiftVS dst shift)); |
|
9023 effect(TEMP src); |
|
9024 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
|
9025 ins_encode %{ |
|
9026 int vector_len = 1; |
|
9027 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9028 %} |
|
9029 ins_pipe( pipe_slow ); |
|
9030 %} |
|
9031 |
|
9032 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ |
|
9033 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
|
9034 match(Set dst (LShiftVS src shift)); |
|
9035 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
|
9036 ins_encode %{ |
|
9037 int vector_len = 1; |
7856 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
9038 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
7857 %} |
9039 %} |
7858 ins_pipe( pipe_slow ); |
9040 ins_pipe( pipe_slow ); |
7859 %} |
9041 %} |
7860 |
9042 |
|
9043 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ |
|
9044 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
9045 match(Set dst (LShiftVS src shift)); |
|
9046 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
|
9047 ins_encode %{ |
|
9048 int vector_len = 1; |
|
9049 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9050 %} |
|
9051 ins_pipe( pipe_slow ); |
|
9052 %} |
|
9053 |
|
9054 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ |
|
9055 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
9056 match(Set dst (LShiftVS dst shift)); |
|
9057 effect(TEMP src); |
|
9058 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} |
|
9059 ins_encode %{ |
|
9060 int vector_len = 1; |
|
9061 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9062 %} |
|
9063 ins_pipe( pipe_slow ); |
|
9064 %} |
|
9065 |
7861 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
9066 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
7862 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
9067 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
7863 match(Set dst (LShiftVS src shift)); |
9068 match(Set dst (LShiftVS src shift)); |
7864 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} |
9069 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} |
7865 ins_encode %{ |
9070 ins_encode %{ |
7866 int vector_len = 2; |
9071 int vector_len = 2; |
7867 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9072 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
7868 %} |
9073 %} |
7869 ins_pipe( pipe_slow ); |
9074 ins_pipe( pipe_slow ); |
7870 %} |
9075 %} |
7871 |
9076 |
7872 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
9077 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
7873 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
9078 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
7874 match(Set dst (LShiftVS src shift)); |
9079 match(Set dst (LShiftVS src shift)); |
7875 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} |
9080 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} |
7876 ins_encode %{ |
9081 ins_encode %{ |
7877 int vector_len = 2; |
9082 int vector_len = 2; |
7878 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
9083 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8102 // for negative data because java code convert short value into int with |
9307 // for negative data because java code convert short value into int with |
8103 // sign extension before a shift. But char vectors are fine since chars are |
9308 // sign extension before a shift. But char vectors are fine since chars are |
8104 // unsigned values. |
9309 // unsigned values. |
8105 |
9310 |
8106 instruct vsrl2S(vecS dst, vecS shift) %{ |
9311 instruct vsrl2S(vecS dst, vecS shift) %{ |
8107 predicate(n->as_Vector()->length() == 2); |
9312 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); |
8108 match(Set dst (URShiftVS dst shift)); |
9313 match(Set dst (URShiftVS dst shift)); |
8109 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} |
9314 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} |
8110 ins_encode %{ |
9315 ins_encode %{ |
8111 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
9316 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
8112 %} |
9317 %} |
8113 ins_pipe( pipe_slow ); |
9318 ins_pipe( pipe_slow ); |
8114 %} |
9319 %} |
8115 |
9320 |
8116 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ |
9321 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ |
8117 predicate(n->as_Vector()->length() == 2); |
9322 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); |
8118 match(Set dst (URShiftVS dst shift)); |
9323 match(Set dst (URShiftVS dst shift)); |
8119 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} |
9324 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} |
8120 ins_encode %{ |
9325 ins_encode %{ |
8121 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
9326 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
8122 %} |
9327 %} |
8123 ins_pipe( pipe_slow ); |
9328 ins_pipe( pipe_slow ); |
8124 %} |
9329 %} |
8125 |
9330 |
8126 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ |
9331 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ |
8127 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
9332 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
8128 match(Set dst (URShiftVS src shift)); |
9333 match(Set dst (URShiftVS src shift)); |
8129 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
9334 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
8130 ins_encode %{ |
9335 ins_encode %{ |
8131 int vector_len = 0; |
9336 int vector_len = 0; |
8132 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9337 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8133 %} |
9338 %} |
8134 ins_pipe( pipe_slow ); |
9339 ins_pipe( pipe_slow ); |
8135 %} |
9340 %} |
8136 |
9341 |
8137 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ |
9342 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ |
8138 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
9343 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
8139 match(Set dst (URShiftVS src shift)); |
9344 match(Set dst (URShiftVS src shift)); |
8140 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
9345 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
8141 ins_encode %{ |
9346 ins_encode %{ |
8142 int vector_len = 0; |
9347 int vector_len = 0; |
|
9348 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9349 %} |
|
9350 ins_pipe( pipe_slow ); |
|
9351 %} |
|
9352 |
|
9353 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ |
|
9354 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
9355 match(Set dst (URShiftVS dst shift)); |
|
9356 effect(TEMP src); |
|
9357 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
|
9358 ins_encode %{ |
|
9359 int vector_len = 0; |
|
9360 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9361 %} |
|
9362 ins_pipe( pipe_slow ); |
|
9363 %} |
|
9364 |
|
9365 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ |
|
9366 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
|
9367 match(Set dst (URShiftVS src shift)); |
|
9368 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
|
9369 ins_encode %{ |
|
9370 int vector_len = 0; |
8143 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
9371 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8144 %} |
9372 %} |
8145 ins_pipe( pipe_slow ); |
9373 ins_pipe( pipe_slow ); |
8146 %} |
9374 %} |
8147 |
9375 |
|
9376 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ |
|
9377 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
9378 match(Set dst (URShiftVS src shift)); |
|
9379 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
|
9380 ins_encode %{ |
|
9381 int vector_len = 0; |
|
9382 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9383 %} |
|
9384 ins_pipe( pipe_slow ); |
|
9385 %} |
|
9386 |
|
9387 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ |
|
9388 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
9389 match(Set dst (URShiftVS dst shift)); |
|
9390 effect(TEMP src); |
|
9391 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} |
|
9392 ins_encode %{ |
|
9393 int vector_len = 0; |
|
9394 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9395 %} |
|
9396 ins_pipe( pipe_slow ); |
|
9397 %} |
|
9398 |
8148 instruct vsrl4S(vecD dst, vecS shift) %{ |
9399 instruct vsrl4S(vecD dst, vecS shift) %{ |
8149 predicate(n->as_Vector()->length() == 4); |
9400 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
8150 match(Set dst (URShiftVS dst shift)); |
9401 match(Set dst (URShiftVS dst shift)); |
8151 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} |
9402 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} |
8152 ins_encode %{ |
9403 ins_encode %{ |
8153 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
9404 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
8154 %} |
9405 %} |
8155 ins_pipe( pipe_slow ); |
9406 ins_pipe( pipe_slow ); |
8156 %} |
9407 %} |
8157 |
9408 |
8158 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ |
9409 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ |
8159 predicate(n->as_Vector()->length() == 4); |
9410 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
8160 match(Set dst (URShiftVS dst shift)); |
9411 match(Set dst (URShiftVS dst shift)); |
8161 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} |
9412 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} |
8162 ins_encode %{ |
9413 ins_encode %{ |
8163 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
9414 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
8164 %} |
9415 %} |
8165 ins_pipe( pipe_slow ); |
9416 ins_pipe( pipe_slow ); |
8166 %} |
9417 %} |
8167 |
9418 |
8168 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ |
9419 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ |
8169 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
9420 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
8170 match(Set dst (URShiftVS src shift)); |
9421 match(Set dst (URShiftVS src shift)); |
8171 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
9422 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
8172 ins_encode %{ |
9423 ins_encode %{ |
8173 int vector_len = 0; |
9424 int vector_len = 0; |
8174 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9425 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8175 %} |
9426 %} |
8176 ins_pipe( pipe_slow ); |
9427 ins_pipe( pipe_slow ); |
8177 %} |
9428 %} |
8178 |
9429 |
8179 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
9430 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ |
8180 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
9431 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
8181 match(Set dst (URShiftVS src shift)); |
9432 match(Set dst (URShiftVS src shift)); |
8182 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
9433 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
8183 ins_encode %{ |
9434 ins_encode %{ |
8184 int vector_len = 0; |
9435 int vector_len = 0; |
|
9436 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9437 %} |
|
9438 ins_pipe( pipe_slow ); |
|
9439 %} |
|
9440 |
|
9441 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ |
|
9442 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
9443 match(Set dst (URShiftVS dst shift)); |
|
9444 effect(TEMP src); |
|
9445 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
|
9446 ins_encode %{ |
|
9447 int vector_len = 0; |
|
9448 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9449 %} |
|
9450 ins_pipe( pipe_slow ); |
|
9451 %} |
|
9452 |
|
9453 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ |
|
9454 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
|
9455 match(Set dst (URShiftVS src shift)); |
|
9456 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
|
9457 ins_encode %{ |
|
9458 int vector_len = 0; |
8185 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
9459 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8186 %} |
9460 %} |
8187 ins_pipe( pipe_slow ); |
9461 ins_pipe( pipe_slow ); |
8188 %} |
9462 %} |
8189 |
9463 |
|
9464 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ |
|
9465 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
9466 match(Set dst (URShiftVS src shift)); |
|
9467 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
|
9468 ins_encode %{ |
|
9469 int vector_len = 0; |
|
9470 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9471 %} |
|
9472 ins_pipe( pipe_slow ); |
|
9473 %} |
|
9474 |
|
9475 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ |
|
9476 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
9477 match(Set dst (URShiftVS dst shift)); |
|
9478 effect(TEMP src); |
|
9479 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} |
|
9480 ins_encode %{ |
|
9481 int vector_len = 0; |
|
9482 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9483 %} |
|
9484 ins_pipe( pipe_slow ); |
|
9485 %} |
|
9486 |
8190 instruct vsrl8S(vecX dst, vecS shift) %{ |
9487 instruct vsrl8S(vecX dst, vecS shift) %{ |
8191 predicate(n->as_Vector()->length() == 8); |
9488 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
8192 match(Set dst (URShiftVS dst shift)); |
9489 match(Set dst (URShiftVS dst shift)); |
8193 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} |
9490 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} |
8194 ins_encode %{ |
9491 ins_encode %{ |
8195 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
9492 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); |
8196 %} |
9493 %} |
8197 ins_pipe( pipe_slow ); |
9494 ins_pipe( pipe_slow ); |
8198 %} |
9495 %} |
8199 |
9496 |
8200 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ |
9497 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ |
8201 predicate(n->as_Vector()->length() == 8); |
9498 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
8202 match(Set dst (URShiftVS dst shift)); |
9499 match(Set dst (URShiftVS dst shift)); |
8203 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} |
9500 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} |
8204 ins_encode %{ |
9501 ins_encode %{ |
8205 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
9502 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); |
8206 %} |
9503 %} |
8207 ins_pipe( pipe_slow ); |
9504 ins_pipe( pipe_slow ); |
8208 %} |
9505 %} |
8209 |
9506 |
8210 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ |
9507 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ |
8211 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
9508 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
8212 match(Set dst (URShiftVS src shift)); |
9509 match(Set dst (URShiftVS src shift)); |
8213 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
9510 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
8214 ins_encode %{ |
9511 ins_encode %{ |
8215 int vector_len = 0; |
9512 int vector_len = 0; |
8216 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9513 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8217 %} |
9514 %} |
8218 ins_pipe( pipe_slow ); |
9515 ins_pipe( pipe_slow ); |
8219 %} |
9516 %} |
8220 |
9517 |
8221 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
9518 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ |
8222 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
9519 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
8223 match(Set dst (URShiftVS src shift)); |
9520 match(Set dst (URShiftVS src shift)); |
8224 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
9521 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
8225 ins_encode %{ |
9522 ins_encode %{ |
8226 int vector_len = 0; |
9523 int vector_len = 0; |
|
9524 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9525 %} |
|
9526 ins_pipe( pipe_slow ); |
|
9527 %} |
|
9528 |
|
9529 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ |
|
9530 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
9531 match(Set dst (URShiftVS dst shift)); |
|
9532 effect(TEMP src); |
|
9533 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
|
9534 ins_encode %{ |
|
9535 int vector_len = 0; |
|
9536 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9537 %} |
|
9538 ins_pipe( pipe_slow ); |
|
9539 %} |
|
9540 |
|
9541 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ |
|
9542 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
|
9543 match(Set dst (URShiftVS src shift)); |
|
9544 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
|
9545 ins_encode %{ |
|
9546 int vector_len = 0; |
8227 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
9547 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8228 %} |
9548 %} |
8229 ins_pipe( pipe_slow ); |
9549 ins_pipe( pipe_slow ); |
8230 %} |
9550 %} |
8231 |
9551 |
8232 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ |
9552 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ |
8233 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
9553 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
9554 match(Set dst (URShiftVS src shift)); |
|
9555 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
|
9556 ins_encode %{ |
|
9557 int vector_len = 0; |
|
9558 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9559 %} |
|
9560 ins_pipe( pipe_slow ); |
|
9561 %} |
|
9562 |
|
9563 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ |
|
9564 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
9565 match(Set dst (URShiftVS dst shift)); |
|
9566 effect(TEMP src); |
|
9567 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} |
|
9568 ins_encode %{ |
|
9569 int vector_len = 0; |
|
9570 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9571 %} |
|
9572 ins_pipe( pipe_slow ); |
|
9573 %} |
|
9574 |
|
9575 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ |
|
9576 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
8234 match(Set dst (URShiftVS src shift)); |
9577 match(Set dst (URShiftVS src shift)); |
8235 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
9578 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
8236 ins_encode %{ |
9579 ins_encode %{ |
8237 int vector_len = 1; |
9580 int vector_len = 1; |
8238 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9581 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8239 %} |
9582 %} |
8240 ins_pipe( pipe_slow ); |
9583 ins_pipe( pipe_slow ); |
8241 %} |
9584 %} |
8242 |
9585 |
8243 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
9586 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ |
8244 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
9587 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
8245 match(Set dst (URShiftVS src shift)); |
9588 match(Set dst (URShiftVS src shift)); |
8246 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
9589 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
8247 ins_encode %{ |
9590 ins_encode %{ |
8248 int vector_len = 1; |
9591 int vector_len = 1; |
|
9592 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9593 %} |
|
9594 ins_pipe( pipe_slow ); |
|
9595 %} |
|
9596 |
|
9597 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ |
|
9598 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
9599 match(Set dst (URShiftVS dst shift)); |
|
9600 effect(TEMP src); |
|
9601 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
|
9602 ins_encode %{ |
|
9603 int vector_len = 1; |
|
9604 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9605 %} |
|
9606 ins_pipe( pipe_slow ); |
|
9607 %} |
|
9608 |
|
9609 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ |
|
9610 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
|
9611 match(Set dst (URShiftVS src shift)); |
|
9612 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
|
9613 ins_encode %{ |
|
9614 int vector_len = 1; |
8249 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
9615 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8250 %} |
9616 %} |
8251 ins_pipe( pipe_slow ); |
9617 ins_pipe( pipe_slow ); |
8252 %} |
9618 %} |
8253 |
9619 |
|
9620 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ |
|
9621 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
9622 match(Set dst (URShiftVS src shift)); |
|
9623 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
|
9624 ins_encode %{ |
|
9625 int vector_len = 1; |
|
9626 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9627 %} |
|
9628 ins_pipe( pipe_slow ); |
|
9629 %} |
|
9630 |
|
9631 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ |
|
9632 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
9633 match(Set dst (URShiftVS dst shift)); |
|
9634 effect(TEMP src); |
|
9635 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} |
|
9636 ins_encode %{ |
|
9637 int vector_len = 1; |
|
9638 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9639 %} |
|
9640 ins_pipe( pipe_slow ); |
|
9641 %} |
|
9642 |
8254 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
9643 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
8255 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
9644 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
8256 match(Set dst (URShiftVS src shift)); |
9645 match(Set dst (URShiftVS src shift)); |
8257 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} |
9646 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} |
8258 ins_encode %{ |
9647 ins_encode %{ |
8259 int vector_len = 2; |
9648 int vector_len = 2; |
8260 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9649 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8261 %} |
9650 %} |
8262 ins_pipe( pipe_slow ); |
9651 ins_pipe( pipe_slow ); |
8263 %} |
9652 %} |
8264 |
9653 |
8265 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
9654 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
8266 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
9655 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
8267 match(Set dst (URShiftVS src shift)); |
9656 match(Set dst (URShiftVS src shift)); |
8268 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} |
9657 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} |
8269 ins_encode %{ |
9658 ins_encode %{ |
8270 int vector_len = 2; |
9659 int vector_len = 2; |
8271 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
9660 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8510 __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
9899 __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
8511 %} |
9900 %} |
8512 ins_pipe( pipe_slow ); |
9901 ins_pipe( pipe_slow ); |
8513 %} |
9902 %} |
8514 |
9903 |
8515 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ |
9904 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ |
8516 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
9905 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
8517 match(Set dst (RShiftVS src shift)); |
9906 match(Set dst (RShiftVS src shift)); |
8518 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
9907 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
8519 ins_encode %{ |
9908 ins_encode %{ |
8520 int vector_len = 0; |
9909 int vector_len = 0; |
8521 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9910 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8522 %} |
9911 %} |
8523 ins_pipe( pipe_slow ); |
9912 ins_pipe( pipe_slow ); |
8524 %} |
9913 %} |
8525 |
9914 |
8526 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ |
9915 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ |
8527 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); |
9916 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
8528 match(Set dst (RShiftVS src shift)); |
9917 match(Set dst (RShiftVS src shift)); |
8529 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
9918 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
8530 ins_encode %{ |
9919 ins_encode %{ |
8531 int vector_len = 0; |
9920 int vector_len = 0; |
|
9921 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9922 %} |
|
9923 ins_pipe( pipe_slow ); |
|
9924 %} |
|
9925 |
|
9926 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ |
|
9927 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
9928 match(Set dst (RShiftVS dst shift)); |
|
9929 effect(TEMP src); |
|
9930 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
|
9931 ins_encode %{ |
|
9932 int vector_len = 0; |
|
9933 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
9934 %} |
|
9935 ins_pipe( pipe_slow ); |
|
9936 %} |
|
9937 |
|
9938 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ |
|
9939 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); |
|
9940 match(Set dst (RShiftVS src shift)); |
|
9941 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
|
9942 ins_encode %{ |
|
9943 int vector_len = 0; |
8532 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
9944 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8533 %} |
9945 %} |
8534 ins_pipe( pipe_slow ); |
9946 ins_pipe( pipe_slow ); |
8535 %} |
9947 %} |
8536 |
9948 |
|
9949 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ |
|
9950 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); |
|
9951 match(Set dst (RShiftVS src shift)); |
|
9952 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
|
9953 ins_encode %{ |
|
9954 int vector_len = 0; |
|
9955 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9956 %} |
|
9957 ins_pipe( pipe_slow ); |
|
9958 %} |
|
9959 |
|
9960 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ |
|
9961 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); |
|
9962 match(Set dst (RShiftVS dst shift)); |
|
9963 effect(TEMP src); |
|
9964 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} |
|
9965 ins_encode %{ |
|
9966 int vector_len = 0; |
|
9967 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
9968 %} |
|
9969 ins_pipe( pipe_slow ); |
|
9970 %} |
|
9971 |
8537 instruct vsra4S(vecD dst, vecS shift) %{ |
9972 instruct vsra4S(vecD dst, vecS shift) %{ |
8538 predicate(n->as_Vector()->length() == 4); |
9973 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
8539 match(Set dst (RShiftVS dst shift)); |
9974 match(Set dst (RShiftVS dst shift)); |
8540 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} |
9975 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} |
8541 ins_encode %{ |
9976 ins_encode %{ |
8542 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); |
9977 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); |
8543 %} |
9978 %} |
8544 ins_pipe( pipe_slow ); |
9979 ins_pipe( pipe_slow ); |
8545 %} |
9980 %} |
8546 |
9981 |
8547 instruct vsra4S_imm(vecD dst, immI8 shift) %{ |
9982 instruct vsra4S_imm(vecD dst, immI8 shift) %{ |
8548 predicate(n->as_Vector()->length() == 4); |
9983 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); |
8549 match(Set dst (RShiftVS dst shift)); |
9984 match(Set dst (RShiftVS dst shift)); |
8550 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} |
9985 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} |
8551 ins_encode %{ |
9986 ins_encode %{ |
8552 __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
9987 __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
8553 %} |
9988 %} |
8554 ins_pipe( pipe_slow ); |
9989 ins_pipe( pipe_slow ); |
8555 %} |
9990 %} |
8556 |
9991 |
8557 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ |
9992 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ |
8558 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
9993 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
8559 match(Set dst (RShiftVS src shift)); |
9994 match(Set dst (RShiftVS src shift)); |
8560 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
9995 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
8561 ins_encode %{ |
9996 ins_encode %{ |
8562 int vector_len = 0; |
9997 int vector_len = 0; |
8563 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
9998 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8564 %} |
9999 %} |
8565 ins_pipe( pipe_slow ); |
10000 ins_pipe( pipe_slow ); |
8566 %} |
10001 %} |
8567 |
10002 |
8568 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ |
10003 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ |
8569 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); |
10004 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
8570 match(Set dst (RShiftVS src shift)); |
10005 match(Set dst (RShiftVS src shift)); |
8571 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
10006 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
8572 ins_encode %{ |
10007 ins_encode %{ |
8573 int vector_len = 0; |
10008 int vector_len = 0; |
|
10009 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
10010 %} |
|
10011 ins_pipe( pipe_slow ); |
|
10012 %} |
|
10013 |
|
10014 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ |
|
10015 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
10016 match(Set dst (RShiftVS dst shift)); |
|
10017 effect(TEMP src); |
|
10018 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
|
10019 ins_encode %{ |
|
10020 int vector_len = 0; |
|
10021 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
10022 %} |
|
10023 ins_pipe( pipe_slow ); |
|
10024 %} |
|
10025 |
|
10026 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ |
|
10027 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); |
|
10028 match(Set dst (RShiftVS src shift)); |
|
10029 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
|
10030 ins_encode %{ |
|
10031 int vector_len = 0; |
8574 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
10032 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8575 %} |
10033 %} |
8576 ins_pipe( pipe_slow ); |
10034 ins_pipe( pipe_slow ); |
8577 %} |
10035 %} |
8578 |
10036 |
|
10037 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ |
|
10038 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); |
|
10039 match(Set dst (RShiftVS src shift)); |
|
10040 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
|
10041 ins_encode %{ |
|
10042 int vector_len = 0; |
|
10043 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
10044 %} |
|
10045 ins_pipe( pipe_slow ); |
|
10046 %} |
|
10047 |
|
10048 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ |
|
10049 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); |
|
10050 match(Set dst (RShiftVS dst shift)); |
|
10051 effect(TEMP src); |
|
10052 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} |
|
10053 ins_encode %{ |
|
10054 int vector_len = 0; |
|
10055 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
10056 %} |
|
10057 ins_pipe( pipe_slow ); |
|
10058 %} |
|
10059 |
8579 instruct vsra8S(vecX dst, vecS shift) %{ |
10060 instruct vsra8S(vecX dst, vecS shift) %{ |
8580 predicate(n->as_Vector()->length() == 8); |
10061 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
8581 match(Set dst (RShiftVS dst shift)); |
10062 match(Set dst (RShiftVS dst shift)); |
8582 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} |
10063 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} |
8583 ins_encode %{ |
10064 ins_encode %{ |
8584 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); |
10065 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); |
8585 %} |
10066 %} |
8586 ins_pipe( pipe_slow ); |
10067 ins_pipe( pipe_slow ); |
8587 %} |
10068 %} |
8588 |
10069 |
8589 instruct vsra8S_imm(vecX dst, immI8 shift) %{ |
10070 instruct vsra8S_imm(vecX dst, immI8 shift) %{ |
8590 predicate(n->as_Vector()->length() == 8); |
10071 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); |
8591 match(Set dst (RShiftVS dst shift)); |
10072 match(Set dst (RShiftVS dst shift)); |
8592 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} |
10073 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} |
8593 ins_encode %{ |
10074 ins_encode %{ |
8594 __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
10075 __ psraw($dst$$XMMRegister, (int)$shift$$constant); |
8595 %} |
10076 %} |
8596 ins_pipe( pipe_slow ); |
10077 ins_pipe( pipe_slow ); |
8597 %} |
10078 %} |
8598 |
10079 |
8599 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ |
10080 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ |
8600 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
10081 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
8601 match(Set dst (RShiftVS src shift)); |
10082 match(Set dst (RShiftVS src shift)); |
8602 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
10083 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
8603 ins_encode %{ |
10084 ins_encode %{ |
8604 int vector_len = 0; |
10085 int vector_len = 0; |
8605 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
10086 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8606 %} |
10087 %} |
8607 ins_pipe( pipe_slow ); |
10088 ins_pipe( pipe_slow ); |
8608 %} |
10089 %} |
8609 |
10090 |
8610 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ |
10091 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ |
8611 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); |
10092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
8612 match(Set dst (RShiftVS src shift)); |
10093 match(Set dst (RShiftVS src shift)); |
8613 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
10094 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
8614 ins_encode %{ |
10095 ins_encode %{ |
8615 int vector_len = 0; |
10096 int vector_len = 0; |
|
10097 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
10098 %} |
|
10099 ins_pipe( pipe_slow ); |
|
10100 %} |
|
10101 |
|
10102 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ |
|
10103 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
10104 match(Set dst (RShiftVS dst shift)); |
|
10105 effect(TEMP src); |
|
10106 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
|
10107 ins_encode %{ |
|
10108 int vector_len = 0; |
|
10109 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
10110 %} |
|
10111 ins_pipe( pipe_slow ); |
|
10112 %} |
|
10113 |
|
10114 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ |
|
10115 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); |
|
10116 match(Set dst (RShiftVS src shift)); |
|
10117 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
|
10118 ins_encode %{ |
|
10119 int vector_len = 0; |
8616 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
10120 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8617 %} |
10121 %} |
8618 ins_pipe( pipe_slow ); |
10122 ins_pipe( pipe_slow ); |
8619 %} |
10123 %} |
8620 |
10124 |
8621 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ |
10125 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ |
8622 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
10126 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); |
|
10127 match(Set dst (RShiftVS src shift)); |
|
10128 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
|
10129 ins_encode %{ |
|
10130 int vector_len = 0; |
|
10131 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
10132 %} |
|
10133 ins_pipe( pipe_slow ); |
|
10134 %} |
|
10135 |
|
10136 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ |
|
10137 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); |
|
10138 match(Set dst (RShiftVS dst shift)); |
|
10139 effect(TEMP src); |
|
10140 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} |
|
10141 ins_encode %{ |
|
10142 int vector_len = 0; |
|
10143 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
10144 %} |
|
10145 ins_pipe( pipe_slow ); |
|
10146 %} |
|
10147 |
|
10148 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ |
|
10149 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); |
8623 match(Set dst (RShiftVS src shift)); |
10150 match(Set dst (RShiftVS src shift)); |
8624 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
10151 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
8625 ins_encode %{ |
10152 ins_encode %{ |
8626 int vector_len = 1; |
10153 int vector_len = 1; |
8627 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
10154 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8628 %} |
10155 %} |
8629 ins_pipe( pipe_slow ); |
10156 ins_pipe( pipe_slow ); |
8630 %} |
10157 %} |
8631 |
10158 |
8632 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ |
10159 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ |
8633 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); |
10160 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
8634 match(Set dst (RShiftVS src shift)); |
10161 match(Set dst (RShiftVS src shift)); |
8635 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
10162 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
8636 ins_encode %{ |
10163 ins_encode %{ |
8637 int vector_len = 1; |
10164 int vector_len = 1; |
|
10165 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
10166 %} |
|
10167 ins_pipe( pipe_slow ); |
|
10168 %} |
|
10169 |
|
10170 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ |
|
10171 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
10172 match(Set dst (RShiftVS dst shift)); |
|
10173 effect(TEMP src); |
|
10174 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
|
10175 ins_encode %{ |
|
10176 int vector_len = 1; |
|
10177 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
|
10178 %} |
|
10179 ins_pipe( pipe_slow ); |
|
10180 %} |
|
10181 |
|
10182 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ |
|
10183 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); |
|
10184 match(Set dst (RShiftVS src shift)); |
|
10185 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
|
10186 ins_encode %{ |
|
10187 int vector_len = 1; |
8638 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
10188 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
8639 %} |
10189 %} |
8640 ins_pipe( pipe_slow ); |
10190 ins_pipe( pipe_slow ); |
8641 %} |
10191 %} |
8642 |
10192 |
|
10193 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ |
|
10194 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); |
|
10195 match(Set dst (RShiftVS src shift)); |
|
10196 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
|
10197 ins_encode %{ |
|
10198 int vector_len = 1; |
|
10199 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
10200 %} |
|
10201 ins_pipe( pipe_slow ); |
|
10202 %} |
|
10203 |
|
10204 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ |
|
10205 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); |
|
10206 match(Set dst (RShiftVS dst shift)); |
|
10207 effect(TEMP src); |
|
10208 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} |
|
10209 ins_encode %{ |
|
10210 int vector_len = 1; |
|
10211 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
|
10212 %} |
|
10213 ins_pipe( pipe_slow ); |
|
10214 %} |
|
10215 |
8643 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
10216 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ |
8644 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
10217 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
8645 match(Set dst (RShiftVS src shift)); |
10218 match(Set dst (RShiftVS src shift)); |
8646 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} |
10219 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} |
8647 ins_encode %{ |
10220 ins_encode %{ |
8648 int vector_len = 2; |
10221 int vector_len = 2; |
8649 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
10222 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); |
8650 %} |
10223 %} |
8651 ins_pipe( pipe_slow ); |
10224 ins_pipe( pipe_slow ); |
8652 %} |
10225 %} |
8653 |
10226 |
8654 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
10227 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ |
8655 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); |
10228 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); |
8656 match(Set dst (RShiftVS src shift)); |
10229 match(Set dst (RShiftVS src shift)); |
8657 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} |
10230 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} |
8658 ins_encode %{ |
10231 ins_encode %{ |
8659 int vector_len = 2; |
10232 int vector_len = 2; |
8660 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |
10233 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); |