src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
changeset 58462 c6f1226cfb72
parent 58103 689a80d20550
child 58679 9c3209ff7550
child 58977 c6a789f495fe
equal deleted inserted replaced
58461:26f0ed77734e 58462:c6f1226cfb72
  1286     Label L_loop;
  1286     Label L_loop;
  1287     __ align(OptoLoopAlignment);
  1287     __ align(OptoLoopAlignment);
  1288     if (UseUnalignedLoadStores) {
  1288     if (UseUnalignedLoadStores) {
  1289       Label L_end;
  1289       Label L_end;
  1290       // Copy 64-bytes per iteration
  1290       // Copy 64-bytes per iteration
  1291       __ BIND(L_loop);
       
  1292       if (UseAVX > 2) {
  1291       if (UseAVX > 2) {
       
  1292         Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
       
  1293 
       
  1294         __ BIND(L_copy_bytes);
       
  1295         __ cmpptr(qword_count, (-1 * AVX3Threshold / 8));
       
  1296         __ jccb(Assembler::less, L_above_threshold);
       
  1297         __ jmpb(L_below_threshold);
       
  1298 
       
  1299         __ bind(L_loop_avx512);
  1293         __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
  1300         __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
  1294         __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
  1301         __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
  1295       } else if (UseAVX == 2) {
  1302         __ bind(L_above_threshold);
       
  1303         __ addptr(qword_count, 8);
       
  1304         __ jcc(Assembler::lessEqual, L_loop_avx512);
       
  1305         __ jmpb(L_32_byte_head);
       
  1306 
       
  1307         __ bind(L_loop_avx2);
  1296         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
  1308         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
  1297         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
  1309         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
  1298         __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
  1310         __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
  1299         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
  1311         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
       
  1312         __ bind(L_below_threshold);
       
  1313         __ addptr(qword_count, 8);
       
  1314         __ jcc(Assembler::lessEqual, L_loop_avx2);
       
  1315 
       
  1316         __ bind(L_32_byte_head);
       
  1317         __ subptr(qword_count, 4);  // sub(8) and add(4)
       
  1318         __ jccb(Assembler::greater, L_end);
  1300       } else {
  1319       } else {
  1301         __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
  1320         __ BIND(L_loop);
  1302         __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
  1321         if (UseAVX == 2) {
  1303         __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
  1322           __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
  1304         __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
  1323           __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
  1305         __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
  1324           __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
  1306         __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
  1325           __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
  1307         __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
  1326         } else {
  1308         __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
  1327           __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
       
  1328           __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
       
  1329           __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
       
  1330           __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
       
  1331           __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
       
  1332           __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
       
  1333           __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
       
  1334           __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
       
  1335         }
       
  1336 
       
  1337         __ BIND(L_copy_bytes);
       
  1338         __ addptr(qword_count, 8);
       
  1339         __ jcc(Assembler::lessEqual, L_loop);
       
  1340         __ subptr(qword_count, 4);  // sub(8) and add(4)
       
  1341         __ jccb(Assembler::greater, L_end);
  1309       }
  1342       }
  1310       __ BIND(L_copy_bytes);
       
  1311       __ addptr(qword_count, 8);
       
  1312       __ jcc(Assembler::lessEqual, L_loop);
       
  1313       __ subptr(qword_count, 4);  // sub(8) and add(4)
       
  1314       __ jccb(Assembler::greater, L_end);
       
  1315       // Copy trailing 32 bytes
  1343       // Copy trailing 32 bytes
  1316       if (UseAVX >= 2) {
  1344       if (UseAVX >= 2) {
  1317         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
  1345         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
  1318         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
  1346         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
  1319       } else {
  1347       } else {
  1366     Label L_loop;
  1394     Label L_loop;
  1367     __ align(OptoLoopAlignment);
  1395     __ align(OptoLoopAlignment);
  1368     if (UseUnalignedLoadStores) {
  1396     if (UseUnalignedLoadStores) {
  1369       Label L_end;
  1397       Label L_end;
  1370       // Copy 64-bytes per iteration
  1398       // Copy 64-bytes per iteration
  1371       __ BIND(L_loop);
       
  1372       if (UseAVX > 2) {
  1399       if (UseAVX > 2) {
       
  1400         Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
       
  1401 
       
  1402         __ BIND(L_copy_bytes);
       
  1403         __ cmpptr(qword_count, (AVX3Threshold / 8));
       
  1404         __ jccb(Assembler::greater, L_above_threshold);
       
  1405         __ jmpb(L_below_threshold);
       
  1406 
       
  1407         __ BIND(L_loop_avx512);
  1373         __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit);
  1408         __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit);
  1374         __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
  1409         __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
  1375       } else if (UseAVX == 2) {
  1410         __ bind(L_above_threshold);
       
  1411         __ subptr(qword_count, 8);
       
  1412         __ jcc(Assembler::greaterEqual, L_loop_avx512);
       
  1413         __ jmpb(L_32_byte_head);
       
  1414 
       
  1415         __ bind(L_loop_avx2);
  1376         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
  1416         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
  1377         __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
  1417         __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
  1378         __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
  1418         __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
  1379         __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
  1419         __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
       
  1420         __ bind(L_below_threshold);
       
  1421         __ subptr(qword_count, 8);
       
  1422         __ jcc(Assembler::greaterEqual, L_loop_avx2);
       
  1423 
       
  1424         __ bind(L_32_byte_head);
       
  1425         __ addptr(qword_count, 4);  // add(8) and sub(4)
       
  1426         __ jccb(Assembler::less, L_end);
  1380       } else {
  1427       } else {
  1381         __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
  1428         __ BIND(L_loop);
  1382         __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
  1429         if (UseAVX == 2) {
  1383         __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
  1430           __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
  1384         __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
  1431           __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
  1385         __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
  1432           __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
  1386         __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
  1433           __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
  1387         __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
  1434         } else {
  1388         __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
  1435           __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
       
  1436           __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
       
  1437           __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
       
  1438           __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
       
  1439           __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
       
  1440           __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
       
  1441           __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
       
  1442           __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
       
  1443         }
       
  1444 
       
  1445         __ BIND(L_copy_bytes);
       
  1446         __ subptr(qword_count, 8);
       
  1447         __ jcc(Assembler::greaterEqual, L_loop);
       
  1448 
       
  1449         __ addptr(qword_count, 4);  // add(8) and sub(4)
       
  1450         __ jccb(Assembler::less, L_end);
  1389       }
  1451       }
  1390       __ BIND(L_copy_bytes);
       
  1391       __ subptr(qword_count, 8);
       
  1392       __ jcc(Assembler::greaterEqual, L_loop);
       
  1393 
       
  1394       __ addptr(qword_count, 4);  // add(8) and sub(4)
       
  1395       __ jccb(Assembler::less, L_end);
       
  1396       // Copy trailing 32 bytes
  1452       // Copy trailing 32 bytes
  1397       if (UseAVX >= 2) {
  1453       if (UseAVX >= 2) {
  1398         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
  1454         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
  1399         __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
  1455         __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
  1400       } else {
  1456       } else {