src/hotspot/cpu/x86/x86.ad
branchdatagramsocketimpl-branch
changeset 58678 9cf78a70fa4f
parent 55061 d1fa0f8d8c9a
child 58679 9c3209ff7550
equal deleted inserted replaced
58677:13588c901957 58678:9cf78a70fa4f
  1095                       );
  1095                       );
  1096 
  1096 
  1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
  1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
  1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
  1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
  1099 
  1099 
  1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
       
  1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h);
       
  1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p);
       
  1103 
       
  1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d);
       
  1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h);
       
  1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p);
       
  1107 
       
  1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d);
       
  1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h);
       
  1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p);
       
  1111 
       
  1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d);
       
  1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h);
       
  1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p);
       
  1115 
       
  1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d);
       
  1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h);
       
  1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p);
       
  1119 
       
  1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d);
       
  1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h);
       
  1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p);
       
  1123 
       
  1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d);
       
  1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h);
       
  1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p);
       
  1127 
       
  1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d);
       
  1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h);
       
  1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p);
       
  1131 
       
  1132 #ifdef _LP64
       
  1133 
       
  1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d);
       
  1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h);
       
  1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p);
       
  1137 
       
  1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d);
       
  1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h);
       
  1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p);
       
  1141 
       
  1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d);
       
  1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h);
       
  1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p);
       
  1145 
       
  1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d);
       
  1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h);
       
  1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p);
       
  1149 
       
  1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d);
       
  1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h);
       
  1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p);
       
  1153 
       
  1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d);
       
  1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h);
       
  1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p);
       
  1157 
       
  1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d);
       
  1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h);
       
  1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p);
       
  1161 
       
  1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d);
       
  1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
       
  1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
       
  1165 
       
  1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d);
       
  1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h);
       
  1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p);
       
  1169 
       
  1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d);
       
  1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h);
       
  1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p);
       
  1173 
       
  1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d);
       
  1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h);
       
  1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p);
       
  1177 
       
  1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d);
       
  1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h);
       
  1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p);
       
  1181 
       
  1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d);
       
  1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h);
       
  1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p);
       
  1185 
       
  1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d);
       
  1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h);
       
  1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p);
       
  1189 
       
  1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d);
       
  1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h);
       
  1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p);
       
  1193 
       
  1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d);
       
  1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h);
       
  1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p);
       
  1197 
       
  1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d);
       
  1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h);
       
  1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p);
       
  1201 
       
  1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d);
       
  1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h);
       
  1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p);
       
  1205 
       
  1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d);
       
  1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h);
       
  1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p);
       
  1209 
       
  1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d);
       
  1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h);
       
  1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p);
       
  1213 
       
  1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d);
       
  1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h);
       
  1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p);
       
  1217 
       
  1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d);
       
  1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h);
       
  1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p);
       
  1221 
       
  1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d);
       
  1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h);
       
  1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p);
       
  1225 
       
  1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d);
       
  1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
       
  1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
       
  1229 
       
  1230 #endif
       
  1231 
       
  1232 %}
  1100 %}
  1233 
  1101 
  1234 
  1102 
  1235 //----------SOURCE BLOCK-------------------------------------------------------
  1103 //----------SOURCE BLOCK-------------------------------------------------------
  1236 // This is a block of C++ code which provides values, functions, and
  1104 // This is a block of C++ code which provides values, functions, and
  1410       break;
  1278       break;
  1411     case Op_AbsVB:
  1279     case Op_AbsVB:
  1412     case Op_AbsVS:
  1280     case Op_AbsVS:
  1413     case Op_AbsVI:
  1281     case Op_AbsVI:
  1414     case Op_AddReductionVI:
  1282     case Op_AddReductionVI:
  1415       if (UseSSE < 3) // requires at least SSE3
  1283       if (UseSSE < 3 || !VM_Version::supports_ssse3()) // requires at least SSSE3
  1416         ret_value = false;
  1284         ret_value = false;
  1417       break;
  1285       break;
  1418     case Op_MulReductionVI:
  1286     case Op_MulReductionVI:
  1419       if (UseSSE < 4) // requires at least SSE4
  1287       if (UseSSE < 4) // requires at least SSE4
  1420         ret_value = false;
  1288         ret_value = false;
  1476     case Op_MinF:
  1344     case Op_MinF:
  1477       if (UseAVX < 1) // enabled for AVX only
  1345       if (UseAVX < 1) // enabled for AVX only
  1478         ret_value = false;
  1346         ret_value = false;
  1479       break;
  1347       break;
  1480 #endif
  1348 #endif
       
  1349     case Op_CacheWB:
       
  1350     case Op_CacheWBPreSync:
       
  1351     case Op_CacheWBPostSync:
       
  1352       if (!VM_Version::supports_data_cache_line_flush()) {
       
  1353         ret_value = false;
       
  1354       }
       
  1355       break;
       
  1356     case Op_RoundDoubleMode:
       
  1357       if (UseSSE < 4)
       
  1358          ret_value = false;
       
  1359       break;
  1481   }
  1360   }
  1482 
  1361 
  1483   return ret_value;  // Per default match rules are supported.
  1362   return ret_value;  // Per default match rules are supported.
  1484 }
  1363 }
  1485 
  1364 
  1526           ret_value = false;
  1405           ret_value = false;
  1527         break;
  1406         break;
  1528       case Op_CMoveVD:
  1407       case Op_CMoveVD:
  1529         if (vlen != 4)
  1408         if (vlen != 4)
  1530           ret_value  = false;
  1409           ret_value  = false;
       
  1410         break;
       
  1411       case Op_RoundDoubleModeV:
       
  1412         if (VM_Version::supports_avx() == false)
       
  1413           ret_value = false;
  1531         break;
  1414         break;
  1532     }
  1415     }
  1533   }
  1416   }
  1534 
  1417 
  1535   return ret_value;  // Per default match rules are supported.
  1418   return ret_value;  // Per default match rules are supported.
  1783   }
  1666   }
  1784   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  1667   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  1785   return (UseAVX > 2) ? 6 : 4;
  1668   return (UseAVX > 2) ? 6 : 4;
  1786 }
  1669 }
  1787 
  1670 
  1788 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
  1671 int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
  1789                             int stack_offset, int reg, uint ireg, outputStream* st) {
  1672                      int stack_offset, int reg, uint ireg, outputStream* st) {
  1790   // In 64-bit VM size calculation is very complex. Emitting instructions
  1673   // In 64-bit VM size calculation is very complex. Emitting instructions
  1791   // into scratch buffer is used to get size in 64-bit VM.
  1674   // into scratch buffer is used to get size in 64-bit VM.
  1792   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
  1675   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
  1793   if (cbuf) {
  1676   if (cbuf) {
  1794     MacroAssembler _masm(cbuf);
  1677     MacroAssembler _masm(cbuf);
  2088 
  1971 
  2089 instruct ShouldNotReachHere() %{
  1972 instruct ShouldNotReachHere() %{
  2090   match(Halt);
  1973   match(Halt);
  2091   format %{ "ud2\t# ShouldNotReachHere" %}
  1974   format %{ "ud2\t# ShouldNotReachHere" %}
  2092   ins_encode %{
  1975   ins_encode %{
  2093     __ ud2();
  1976     __ stop(_halt_reason);
  2094   %}
  1977   %}
  2095   ins_pipe(pipe_slow);
  1978   ins_pipe(pipe_slow);
  2096 %}
  1979 %}
  2097 
  1980 
  2098 // =================================EVEX special===============================
  1981 // =================================EVEX special===============================
  2844   ins_encode %{
  2727   ins_encode %{
  2845     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
  2728     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
  2846   %}
  2729   %}
  2847   ins_pipe(pipe_slow);
  2730   ins_pipe(pipe_slow);
  2848 %}
  2731 %}
       
  2732 
       
  2733 
       
  2734 #ifdef _LP64
       
  2735 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
       
  2736   predicate(UseSSE>=4);
       
  2737   match(Set dst (RoundDoubleMode src rmode));
       
  2738   format %{ "roundsd  $dst, $src" %}
       
  2739   ins_cost(150);
       
  2740   ins_encode %{
       
  2741     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
       
  2742   %}
       
  2743   ins_pipe(pipe_slow);
       
  2744 %}
       
  2745 
       
  2746 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{
       
  2747   predicate(UseSSE>=4);
       
  2748   match(Set dst (RoundDoubleMode (LoadD src) rmode));
       
  2749   format %{ "roundsd  $dst, $src" %}
       
  2750   ins_cost(150);
       
  2751   ins_encode %{
       
  2752     __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant);
       
  2753   %}
       
  2754   ins_pipe(pipe_slow);
       
  2755 %}
       
  2756 
       
  2757 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{
       
  2758   predicate(UseSSE>=4);
       
  2759   match(Set dst (RoundDoubleMode con rmode));
       
  2760   effect(TEMP scratch_reg);
       
  2761   format %{ "roundsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
       
  2762   ins_cost(150);
       
  2763   ins_encode %{
       
  2764     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register);
       
  2765   %}
       
  2766   ins_pipe(pipe_slow);
       
  2767 %}
       
  2768 
       
  2769 instruct vround2D_reg(legVecX dst, legVecX src, immU8 rmode) %{
       
  2770   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
       
  2771   match(Set dst (RoundDoubleModeV src rmode));
       
  2772   format %{ "vroundpd  $dst, $src, $rmode\t! round packed2D" %}
       
  2773   ins_encode %{
       
  2774     int vector_len = 0;
       
  2775     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
       
  2776   %}
       
  2777   ins_pipe( pipe_slow );
       
  2778 %}
       
  2779 
       
  2780 instruct vround2D_mem(legVecX dst, memory mem, immU8 rmode) %{
       
  2781   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
       
  2782   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
       
  2783   format %{ "vroundpd $dst, $mem, $rmode\t! round packed2D" %}
       
  2784   ins_encode %{
       
  2785     int vector_len = 0;
       
  2786     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
       
  2787   %}
       
  2788   ins_pipe( pipe_slow );
       
  2789 %}
       
  2790 
       
  2791 instruct vround4D_reg(legVecY dst, legVecY src, legVecY rmode) %{
       
  2792   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
       
  2793   match(Set dst (RoundDoubleModeV src rmode));
       
  2794   format %{ "vroundpd  $dst, $src, $rmode\t! round packed4D" %}
       
  2795   ins_encode %{
       
  2796     int vector_len = 1;
       
  2797     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
       
  2798   %}
       
  2799   ins_pipe( pipe_slow );
       
  2800 %}
       
  2801 
       
  2802 instruct vround4D_mem(legVecY dst, memory mem, immU8 rmode) %{
       
  2803   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
       
  2804   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
       
  2805   format %{ "vroundpd $dst, $mem, $rmode\t! round packed4D" %}
       
  2806   ins_encode %{
       
  2807     int vector_len = 1;
       
  2808     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
       
  2809   %}
       
  2810   ins_pipe( pipe_slow );
       
  2811 %}
       
  2812 
       
  2813 
       
  2814 instruct vround8D_reg(vecZ dst, vecZ src, immU8 rmode) %{
       
  2815   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
       
  2816   match(Set dst (RoundDoubleModeV src rmode));
       
  2817   format %{ "vrndscalepd $dst, $src, $rmode\t! round packed8D" %}
       
  2818   ins_encode %{
       
  2819     int vector_len = 2;
       
  2820     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
       
  2821   %}
       
  2822   ins_pipe( pipe_slow );
       
  2823 %}
       
  2824 
       
  2825 instruct vround8D_mem(vecZ dst, memory mem, immU8 rmode) %{
       
  2826   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
       
  2827   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
       
  2828   format %{ "vrndscalepd $dst, $mem, $rmode\t! round packed8D" %}
       
  2829   ins_encode %{
       
  2830     int vector_len = 2;
       
  2831     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
       
  2832   %}
       
  2833   ins_pipe( pipe_slow );
       
  2834 %}
       
  2835 #endif // _LP64
  2849 
  2836 
  2850 instruct onspinwait() %{
  2837 instruct onspinwait() %{
  2851   match(OnSpinWait);
  2838   match(OnSpinWait);
  2852   ins_cost(200);
  2839   ins_cost(200);
  2853 
  2840 
  3740   %}
  3727   %}
  3741   ins_pipe( pipe_slow );
  3728   ins_pipe( pipe_slow );
  3742 %}
  3729 %}
  3743 
  3730 
  3744 instruct Repl2F_zero(vecD dst, immF0 zero) %{
  3731 instruct Repl2F_zero(vecD dst, immF0 zero) %{
  3745   predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
  3732   predicate(n->as_Vector()->length() == 2);
  3746   match(Set dst (ReplicateF zero));
  3733   match(Set dst (ReplicateF zero));
  3747   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
  3734   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
  3748   ins_encode %{
  3735   ins_encode %{
  3749     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  3736     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  3750   %}
  3737   %}
  3751   ins_pipe( fpu_reg_reg );
  3738   ins_pipe( fpu_reg_reg );
  3752 %}
  3739 %}
  3753 
  3740 
  3754 instruct Repl4F_zero(vecX dst, immF0 zero) %{
  3741 instruct Repl4F_zero(vecX dst, immF0 zero) %{
  3755   predicate(n->as_Vector()->length() == 4 && UseAVX < 3);
  3742   predicate(n->as_Vector()->length() == 4);
  3756   match(Set dst (ReplicateF zero));
  3743   match(Set dst (ReplicateF zero));
  3757   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
  3744   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
  3758   ins_encode %{
  3745   ins_encode %{
  3759     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  3746     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  3760   %}
  3747   %}
  3761   ins_pipe( fpu_reg_reg );
  3748   ins_pipe( fpu_reg_reg );
  3762 %}
  3749 %}
  3763 
  3750 
  3764 instruct Repl8F_zero(vecY dst, immF0 zero) %{
  3751 instruct Repl8F_zero(vecY dst, immF0 zero) %{
  3765   predicate(n->as_Vector()->length() == 8 && UseAVX < 3);
  3752   predicate(n->as_Vector()->length() == 8 && UseAVX > 0);
  3766   match(Set dst (ReplicateF zero));
  3753   match(Set dst (ReplicateF zero));
  3767   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
  3754   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
  3768   ins_encode %{
  3755   ins_encode %{
  3769     int vector_len = 1;
  3756     int vector_len = 1;
  3770     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
  3757     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
  3834   ins_pipe( pipe_slow );
  3821   ins_pipe( pipe_slow );
  3835 %}
  3822 %}
  3836 
  3823 
  3837 // Replicate double (8 byte) scalar zero to be vector
  3824 // Replicate double (8 byte) scalar zero to be vector
  3838 instruct Repl2D_zero(vecX dst, immD0 zero) %{
  3825 instruct Repl2D_zero(vecX dst, immD0 zero) %{
  3839   predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
  3826   predicate(n->as_Vector()->length() == 2);
  3840   match(Set dst (ReplicateD zero));
  3827   match(Set dst (ReplicateD zero));
  3841   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
  3828   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
  3842   ins_encode %{
  3829   ins_encode %{
  3843     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
  3830     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
  3844   %}
  3831   %}
  3845   ins_pipe( fpu_reg_reg );
  3832   ins_pipe( fpu_reg_reg );
  3846 %}
  3833 %}
  3847 
  3834 
  3848 instruct Repl4D_zero(vecY dst, immD0 zero) %{
  3835 instruct Repl4D_zero(vecY dst, immD0 zero) %{
  3849   predicate(n->as_Vector()->length() == 4 && UseAVX < 3);
  3836   predicate(n->as_Vector()->length() == 4 && UseAVX > 0);
  3850   match(Set dst (ReplicateD zero));
  3837   match(Set dst (ReplicateD zero));
  3851   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
  3838   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
  3852   ins_encode %{
  3839   ins_encode %{
  3853     int vector_len = 1;
  3840     int vector_len = 1;
  3854     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
  3841     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
  4769     __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
  4756     __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
  4770   %}
  4757   %}
  4771   ins_pipe( pipe_slow );
  4758   ins_pipe( pipe_slow );
  4772 %}
  4759 %}
  4773 
  4760 
  4774 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{
       
  4775   predicate(n->as_Vector()->length() == 2 && UseAVX > 2);
       
  4776   match(Set dst (ReplicateF zero));
       
  4777   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate2F zero" %}
       
  4778   ins_encode %{
       
  4779     // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
       
  4780     int vector_len = 2;
       
  4781     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
       
  4782   %}
       
  4783   ins_pipe( fpu_reg_reg );
       
  4784 %}
       
  4785 
       
  4786 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{
       
  4787   predicate(n->as_Vector()->length() == 4 && UseAVX > 2);
       
  4788   match(Set dst (ReplicateF zero));
       
  4789   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate4F zero" %}
       
  4790   ins_encode %{
       
  4791     // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
       
  4792     int vector_len = 2;
       
  4793     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
       
  4794   %}
       
  4795   ins_pipe( fpu_reg_reg );
       
  4796 %}
       
  4797 
       
  4798 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{
       
  4799   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
       
  4800   match(Set dst (ReplicateF zero));
       
  4801   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate8F zero" %}
       
  4802   ins_encode %{
       
  4803     // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
       
  4804     int vector_len = 2;
       
  4805     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
       
  4806   %}
       
  4807   ins_pipe( fpu_reg_reg );
       
  4808 %}
       
  4809 
       
  4810 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{
  4761 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{
  4811   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
  4762   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
  4812   match(Set dst (ReplicateF zero));
  4763   match(Set dst (ReplicateF zero));
  4813   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate16F zero" %}
  4764   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate16F zero" %}
  4814   ins_encode %{
  4765   ins_encode %{
  4859   ins_encode %{
  4810   ins_encode %{
  4860     int vector_len = 2;
  4811     int vector_len = 2;
  4861     __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
  4812     __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
  4862   %}
  4813   %}
  4863   ins_pipe( pipe_slow );
  4814   ins_pipe( pipe_slow );
  4864 %}
       
  4865 
       
  4866 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{
       
  4867   predicate(n->as_Vector()->length() == 2 && UseAVX > 2);
       
  4868   match(Set dst (ReplicateD zero));
       
  4869   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate2D zero" %}
       
  4870   ins_encode %{
       
  4871     // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
       
  4872     int vector_len = 2;
       
  4873     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
       
  4874   %}
       
  4875   ins_pipe( fpu_reg_reg );
       
  4876 %}
       
  4877 
       
  4878 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{
       
  4879   predicate(n->as_Vector()->length() == 4 && UseAVX > 2);
       
  4880   match(Set dst (ReplicateD zero));
       
  4881   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate4D zero" %}
       
  4882   ins_encode %{
       
  4883     // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
       
  4884     int vector_len = 2;
       
  4885     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
       
  4886   %}
       
  4887   ins_pipe( fpu_reg_reg );
       
  4888 %}
  4815 %}
  4889 
  4816 
  4890 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{
  4817 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{
  4891   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
  4818   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
  4892   match(Set dst (ReplicateD zero));
  4819   match(Set dst (ReplicateD zero));
  8356 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{
  8283 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{
  8357   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
  8284   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
  8358   match(Set dst (LShiftVB src shift));
  8285   match(Set dst (LShiftVB src shift));
  8359   match(Set dst (RShiftVB src shift));
  8286   match(Set dst (RShiftVB src shift));
  8360   match(Set dst (URShiftVB src shift));
  8287   match(Set dst (URShiftVB src shift));
  8361   effect(TEMP dst, TEMP tmp, TEMP scratch);
  8288   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
  8362   format %{"vextendbw $tmp,$src\n\t"
  8289   format %{"vextendbw $tmp,$src\n\t"
  8363            "vshiftw   $tmp,$shift\n\t"
  8290            "vshiftw   $tmp,$shift\n\t"
  8364            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
  8291            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
  8365            "pand      $dst,$tmp\n\t"
  8292            "pand      $dst,$tmp\n\t"
  8366            "packuswb  $dst,$dst\n\t ! packed4B shift" %}
  8293            "packuswb  $dst,$dst\n\t ! packed4B shift" %}
  8379 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{
  8306 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{
  8380   predicate(UseSSE > 3 && n->as_Vector()->length() == 8);
  8307   predicate(UseSSE > 3 && n->as_Vector()->length() == 8);
  8381   match(Set dst (LShiftVB src shift));
  8308   match(Set dst (LShiftVB src shift));
  8382   match(Set dst (RShiftVB src shift));
  8309   match(Set dst (RShiftVB src shift));
  8383   match(Set dst (URShiftVB src shift));
  8310   match(Set dst (URShiftVB src shift));
  8384   effect(TEMP dst, TEMP tmp, TEMP scratch);
  8311   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
  8385   format %{"vextendbw $tmp,$src\n\t"
  8312   format %{"vextendbw $tmp,$src\n\t"
  8386            "vshiftw   $tmp,$shift\n\t"
  8313            "vshiftw   $tmp,$shift\n\t"
  8387            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
  8314            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
  8388            "pand      $dst,$tmp\n\t"
  8315            "pand      $dst,$tmp\n\t"
  8389            "packuswb  $dst,$dst\n\t ! packed8B shift" %}
  8316            "packuswb  $dst,$dst\n\t ! packed8B shift" %}
  8402 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{
  8329 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{
  8403   predicate(UseSSE > 3  && UseAVX <= 1 && n->as_Vector()->length() == 16);
  8330   predicate(UseSSE > 3  && UseAVX <= 1 && n->as_Vector()->length() == 16);
  8404   match(Set dst (LShiftVB src shift));
  8331   match(Set dst (LShiftVB src shift));
  8405   match(Set dst (RShiftVB src shift));
  8332   match(Set dst (RShiftVB src shift));
  8406   match(Set dst (URShiftVB src shift));
  8333   match(Set dst (URShiftVB src shift));
  8407   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
  8334   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
  8408   format %{"vextendbw $tmp1,$src\n\t"
  8335   format %{"vextendbw $tmp1,$src\n\t"
  8409            "vshiftw   $tmp1,$shift\n\t"
  8336            "vshiftw   $tmp1,$shift\n\t"
  8410            "pshufd    $tmp2,$src\n\t"
  8337            "pshufd    $tmp2,$src\n\t"
  8411            "vextendbw $tmp2,$tmp2\n\t"
  8338            "vextendbw $tmp2,$tmp2\n\t"
  8412            "vshiftw   $tmp2,$shift\n\t"
  8339            "vshiftw   $tmp2,$shift\n\t"
  8433 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
  8360 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
  8434   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  8361   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  8435   match(Set dst (LShiftVB src shift));
  8362   match(Set dst (LShiftVB src shift));
  8436   match(Set dst (RShiftVB src shift));
  8363   match(Set dst (RShiftVB src shift));
  8437   match(Set dst (URShiftVB src shift));
  8364   match(Set dst (URShiftVB src shift));
  8438   effect(TEMP dst, TEMP tmp, TEMP scratch);
  8365   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
  8439   format %{"vextendbw  $tmp,$src\n\t"
  8366   format %{"vextendbw  $tmp,$src\n\t"
  8440            "vshiftw    $tmp,$tmp,$shift\n\t"
  8367            "vshiftw    $tmp,$tmp,$shift\n\t"
  8441            "vpand      $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
  8368            "vpand      $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
  8442            "vextracti128_high  $dst,$tmp\n\t"
  8369            "vextracti128_high  $dst,$tmp\n\t"
  8443            "vpackuswb  $dst,$tmp,$dst\n\t! packed16B shift" %}
  8370            "vpackuswb  $dst,$tmp,$dst\n\t! packed16B shift" %}
  8457 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{
  8384 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{
  8458   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  8385   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
  8459   match(Set dst (LShiftVB src shift));
  8386   match(Set dst (LShiftVB src shift));
  8460   match(Set dst (RShiftVB src shift));
  8387   match(Set dst (RShiftVB src shift));
  8461   match(Set dst (URShiftVB src shift));
  8388   match(Set dst (URShiftVB src shift));
  8462   effect(TEMP dst, TEMP tmp, TEMP scratch);
  8389   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
  8463   format %{"vextracti128_high  $tmp,$src\n\t"
  8390   format %{"vextracti128_high  $tmp,$src\n\t"
  8464            "vextendbw  $tmp,$tmp\n\t"
  8391            "vextendbw  $tmp,$tmp\n\t"
  8465            "vextendbw  $dst,$src\n\t"
  8392            "vextendbw  $dst,$src\n\t"
  8466            "vshiftw    $tmp,$tmp,$shift\n\t"
  8393            "vshiftw    $tmp,$tmp,$shift\n\t"
  8467            "vshiftw    $dst,$dst,$shift\n\t"
  8394            "vshiftw    $dst,$dst,$shift\n\t"
  8489 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{
  8416 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{
  8490   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
  8417   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
  8491   match(Set dst (LShiftVB src shift));
  8418   match(Set dst (LShiftVB src shift));
  8492   match(Set dst (RShiftVB src shift));
  8419   match(Set dst (RShiftVB src shift));
  8493   match(Set dst (URShiftVB src shift));
  8420   match(Set dst (URShiftVB src shift));
  8494   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
  8421   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
  8495   format %{"vextracti64x4  $tmp1,$src\n\t"
  8422   format %{"vextracti64x4  $tmp1,$src\n\t"
  8496            "vextendbw      $tmp1,$tmp1\n\t"
  8423            "vextendbw      $tmp1,$tmp1\n\t"
  8497            "vextendbw      $tmp2,$src\n\t"
  8424            "vextendbw      $tmp2,$src\n\t"
  8498            "vshiftw        $tmp1,$tmp1,$shift\n\t"
  8425            "vshiftw        $tmp1,$tmp1,$shift\n\t"
  8499            "vshiftw        $tmp2,$tmp2,$shift\n\t"
  8426            "vshiftw        $tmp2,$tmp2,$shift\n\t"
  8532 instruct vshist2S(vecS dst, vecS src, vecS shift) %{
  8459 instruct vshist2S(vecS dst, vecS src, vecS shift) %{
  8533   predicate(n->as_Vector()->length() == 2);
  8460   predicate(n->as_Vector()->length() == 2);
  8534   match(Set dst (LShiftVS src shift));
  8461   match(Set dst (LShiftVS src shift));
  8535   match(Set dst (RShiftVS src shift));
  8462   match(Set dst (RShiftVS src shift));
  8536   match(Set dst (URShiftVS src shift));
  8463   match(Set dst (URShiftVS src shift));
       
  8464   effect(TEMP dst, USE src, USE shift);
  8537   format %{ "vshiftw  $dst,$src,$shift\t! shift packed2S" %}
  8465   format %{ "vshiftw  $dst,$src,$shift\t! shift packed2S" %}
  8538   ins_encode %{
  8466   ins_encode %{
  8539     int opcode = this->as_Mach()->ideal_Opcode();
  8467     int opcode = this->as_Mach()->ideal_Opcode();
  8540     if (UseAVX == 0) { 
  8468     if (UseAVX == 0) { 
  8541       if ($dst$$XMMRegister != $src$$XMMRegister)
  8469       if ($dst$$XMMRegister != $src$$XMMRegister)
  8552 instruct vshift4S(vecD dst, vecD src, vecS shift) %{
  8480 instruct vshift4S(vecD dst, vecD src, vecS shift) %{
  8553   predicate(n->as_Vector()->length() == 4);
  8481   predicate(n->as_Vector()->length() == 4);
  8554   match(Set dst (LShiftVS src shift));
  8482   match(Set dst (LShiftVS src shift));
  8555   match(Set dst (RShiftVS src shift));
  8483   match(Set dst (RShiftVS src shift));
  8556   match(Set dst (URShiftVS src shift));
  8484   match(Set dst (URShiftVS src shift));
       
  8485   effect(TEMP dst, USE src, USE shift);
  8557   format %{ "vshiftw  $dst,$src,$shift\t! shift packed4S" %}
  8486   format %{ "vshiftw  $dst,$src,$shift\t! shift packed4S" %}
  8558   ins_encode %{
  8487   ins_encode %{
  8559     int opcode = this->as_Mach()->ideal_Opcode();
  8488     int opcode = this->as_Mach()->ideal_Opcode();
  8560     if (UseAVX == 0) { 
  8489     if (UseAVX == 0) { 
  8561       if ($dst$$XMMRegister != $src$$XMMRegister)
  8490       if ($dst$$XMMRegister != $src$$XMMRegister)
  8573 instruct vshift8S(vecX dst, vecX src, vecS shift) %{
  8502 instruct vshift8S(vecX dst, vecX src, vecS shift) %{
  8574   predicate(n->as_Vector()->length() == 8);
  8503   predicate(n->as_Vector()->length() == 8);
  8575   match(Set dst (LShiftVS src shift));
  8504   match(Set dst (LShiftVS src shift));
  8576   match(Set dst (RShiftVS src shift));
  8505   match(Set dst (RShiftVS src shift));
  8577   match(Set dst (URShiftVS src shift));
  8506   match(Set dst (URShiftVS src shift));
       
  8507   effect(TEMP dst, USE src, USE shift);
  8578   format %{ "vshiftw  $dst,$src,$shift\t! shift packed8S" %}
  8508   format %{ "vshiftw  $dst,$src,$shift\t! shift packed8S" %}
  8579   ins_encode %{
  8509   ins_encode %{
  8580     int opcode = this->as_Mach()->ideal_Opcode();
  8510     int opcode = this->as_Mach()->ideal_Opcode();
  8581     if (UseAVX == 0) { 
  8511     if (UseAVX == 0) { 
  8582       if ($dst$$XMMRegister != $src$$XMMRegister)
  8512       if ($dst$$XMMRegister != $src$$XMMRegister)
  8593 instruct vshift16S(vecY dst, vecY src, vecS shift) %{
  8523 instruct vshift16S(vecY dst, vecY src, vecS shift) %{
  8594   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  8524   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
  8595   match(Set dst (LShiftVS src shift));
  8525   match(Set dst (LShiftVS src shift));
  8596   match(Set dst (RShiftVS src shift));
  8526   match(Set dst (RShiftVS src shift));
  8597   match(Set dst (URShiftVS src shift));
  8527   match(Set dst (URShiftVS src shift));
       
  8528   effect(DEF dst, USE src, USE shift);
  8598   format %{ "vshiftw  $dst,$src,$shift\t! shift packed16S" %}
  8529   format %{ "vshiftw  $dst,$src,$shift\t! shift packed16S" %}
  8599   ins_encode %{
  8530   ins_encode %{
  8600     int vector_len = 1;
  8531     int vector_len = 1;
  8601     int opcode = this->as_Mach()->ideal_Opcode();
  8532     int opcode = this->as_Mach()->ideal_Opcode();
  8602     __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8533     __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8607 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{
  8538 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{
  8608   predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  8539   predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
  8609   match(Set dst (LShiftVS src shift));
  8540   match(Set dst (LShiftVS src shift));
  8610   match(Set dst (RShiftVS src shift));
  8541   match(Set dst (RShiftVS src shift));
  8611   match(Set dst (URShiftVS src shift));
  8542   match(Set dst (URShiftVS src shift));
       
  8543   effect(DEF dst, USE src, USE shift);
  8612   format %{ "vshiftw  $dst,$src,$shift\t! shift packed32S" %}
  8544   format %{ "vshiftw  $dst,$src,$shift\t! shift packed32S" %}
  8613   ins_encode %{
  8545   ins_encode %{
  8614     int vector_len = 2;
  8546     int vector_len = 2;
  8615     int opcode = this->as_Mach()->ideal_Opcode();
  8547     int opcode = this->as_Mach()->ideal_Opcode();
  8616     __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8548     __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8622 instruct vshift2I(vecD dst, vecD src, vecS shift) %{
  8554 instruct vshift2I(vecD dst, vecD src, vecS shift) %{
  8623   predicate(n->as_Vector()->length() == 2);
  8555   predicate(n->as_Vector()->length() == 2);
  8624   match(Set dst (LShiftVI src shift));
  8556   match(Set dst (LShiftVI src shift));
  8625   match(Set dst (RShiftVI src shift));
  8557   match(Set dst (RShiftVI src shift));
  8626   match(Set dst (URShiftVI src shift));
  8558   match(Set dst (URShiftVI src shift));
       
  8559   effect(TEMP dst, USE src, USE shift);
  8627   format %{ "vshiftd  $dst,$src,$shift\t! shift packed2I" %}
  8560   format %{ "vshiftd  $dst,$src,$shift\t! shift packed2I" %}
  8628   ins_encode %{
  8561   ins_encode %{
  8629     int opcode = this->as_Mach()->ideal_Opcode();
  8562     int opcode = this->as_Mach()->ideal_Opcode();
  8630     if (UseAVX == 0) { 
  8563     if (UseAVX == 0) { 
  8631       if ($dst$$XMMRegister != $src$$XMMRegister)
  8564       if ($dst$$XMMRegister != $src$$XMMRegister)
  8642 instruct vshift4I(vecX dst, vecX src, vecS shift) %{
  8575 instruct vshift4I(vecX dst, vecX src, vecS shift) %{
  8643   predicate(n->as_Vector()->length() == 4);
  8576   predicate(n->as_Vector()->length() == 4);
  8644   match(Set dst (LShiftVI src shift));
  8577   match(Set dst (LShiftVI src shift));
  8645   match(Set dst (RShiftVI src shift));
  8578   match(Set dst (RShiftVI src shift));
  8646   match(Set dst (URShiftVI src shift));
  8579   match(Set dst (URShiftVI src shift));
       
  8580   effect(TEMP dst, USE src, USE shift);
  8647   format %{ "vshiftd  $dst,$src,$shift\t! shift packed4I" %}
  8581   format %{ "vshiftd  $dst,$src,$shift\t! shift packed4I" %}
  8648   ins_encode %{
  8582   ins_encode %{
  8649     int opcode = this->as_Mach()->ideal_Opcode();
  8583     int opcode = this->as_Mach()->ideal_Opcode();
  8650     if (UseAVX == 0) { 
  8584     if (UseAVX == 0) { 
  8651       if ($dst$$XMMRegister != $src$$XMMRegister)
  8585       if ($dst$$XMMRegister != $src$$XMMRegister)
  8662 instruct vshift8I(vecY dst, vecY src, vecS shift) %{
  8596 instruct vshift8I(vecY dst, vecY src, vecS shift) %{
  8663   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  8597   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
  8664   match(Set dst (LShiftVI src shift));
  8598   match(Set dst (LShiftVI src shift));
  8665   match(Set dst (RShiftVI src shift));
  8599   match(Set dst (RShiftVI src shift));
  8666   match(Set dst (URShiftVI src shift));
  8600   match(Set dst (URShiftVI src shift));
       
  8601   effect(DEF dst, USE src, USE shift);
  8667   format %{ "vshiftd  $dst,$src,$shift\t! shift packed8I" %}
  8602   format %{ "vshiftd  $dst,$src,$shift\t! shift packed8I" %}
  8668   ins_encode %{
  8603   ins_encode %{
  8669     int vector_len = 1;
  8604     int vector_len = 1;
  8670     int opcode = this->as_Mach()->ideal_Opcode();
  8605     int opcode = this->as_Mach()->ideal_Opcode();
  8671     __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8606     __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8676 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{
  8611 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{
  8677   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
  8612   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
  8678   match(Set dst (LShiftVI src shift));
  8613   match(Set dst (LShiftVI src shift));
  8679   match(Set dst (RShiftVI src shift));
  8614   match(Set dst (RShiftVI src shift));
  8680   match(Set dst (URShiftVI src shift));
  8615   match(Set dst (URShiftVI src shift));
       
  8616   effect(DEF dst, USE src, USE shift);
  8681   format %{ "vshiftd  $dst,$src,$shift\t! shift packed16I" %}
  8617   format %{ "vshiftd  $dst,$src,$shift\t! shift packed16I" %}
  8682   ins_encode %{
  8618   ins_encode %{
  8683     int vector_len = 2;
  8619     int vector_len = 2;
  8684     int opcode = this->as_Mach()->ideal_Opcode();
  8620     int opcode = this->as_Mach()->ideal_Opcode();
  8685     __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8621     __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8690 // Longs vector shift
  8626 // Longs vector shift
  8691 instruct vshift2L(vecX dst, vecX src, vecS shift) %{
  8627 instruct vshift2L(vecX dst, vecX src, vecS shift) %{
  8692   predicate(n->as_Vector()->length() == 2);
  8628   predicate(n->as_Vector()->length() == 2);
  8693   match(Set dst (LShiftVL src shift));
  8629   match(Set dst (LShiftVL src shift));
  8694   match(Set dst (URShiftVL src shift));
  8630   match(Set dst (URShiftVL src shift));
       
  8631   effect(TEMP dst, USE src, USE shift);
  8695   format %{ "vshiftq  $dst,$src,$shift\t! shift packed2L" %}
  8632   format %{ "vshiftq  $dst,$src,$shift\t! shift packed2L" %}
  8696   ins_encode %{
  8633   ins_encode %{
  8697     int opcode = this->as_Mach()->ideal_Opcode();
  8634     int opcode = this->as_Mach()->ideal_Opcode();
  8698     if (UseAVX == 0) { 
  8635     if (UseAVX == 0) { 
  8699       if ($dst$$XMMRegister != $src$$XMMRegister)
  8636       if ($dst$$XMMRegister != $src$$XMMRegister)
  8709 
  8646 
  8710 instruct vshift4L(vecY dst, vecY src, vecS shift) %{
  8647 instruct vshift4L(vecY dst, vecY src, vecS shift) %{
  8711   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  8648   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
  8712   match(Set dst (LShiftVL src shift));
  8649   match(Set dst (LShiftVL src shift));
  8713   match(Set dst (URShiftVL src shift));
  8650   match(Set dst (URShiftVL src shift));
       
  8651   effect(DEF dst, USE src, USE shift);
  8714   format %{ "vshiftq  $dst,$src,$shift\t! left shift packed4L" %}
  8652   format %{ "vshiftq  $dst,$src,$shift\t! left shift packed4L" %}
  8715   ins_encode %{
  8653   ins_encode %{
  8716     int vector_len = 1;
  8654     int vector_len = 1;
  8717     int opcode = this->as_Mach()->ideal_Opcode();
  8655     int opcode = this->as_Mach()->ideal_Opcode();
  8718     __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8656     __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8723 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{
  8661 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{
  8724   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
  8662   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
  8725   match(Set dst (LShiftVL src shift));
  8663   match(Set dst (LShiftVL src shift));
  8726   match(Set dst (RShiftVL src shift));
  8664   match(Set dst (RShiftVL src shift));
  8727   match(Set dst (URShiftVL src shift));
  8665   match(Set dst (URShiftVL src shift));
       
  8666   effect(DEF dst, USE src, USE shift);
  8728   format %{ "vshiftq  $dst,$src,$shift\t! shift packed8L" %}
  8667   format %{ "vshiftq  $dst,$src,$shift\t! shift packed8L" %}
  8729   ins_encode %{
  8668   ins_encode %{
  8730     int vector_len = 2;
  8669     int vector_len = 2;
  8731     int opcode = this->as_Mach()->ideal_Opcode();
  8670     int opcode = this->as_Mach()->ideal_Opcode();
  8732     __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
  8671     __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);