src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
branchdatagramsocketimpl-branch
changeset 58678 9cf78a70fa4f
parent 54786 ebf733a324d4
child 58679 9c3209ff7550
equal deleted inserted replaced
58677:13588c901957 58678:9cf78a70fa4f
  1127     __ mov(r12, rsp);                               // remember rsp
  1127     __ mov(r12, rsp);                               // remember rsp
  1128     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
  1128     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
  1129     __ andptr(rsp, -16);                            // align stack as required by ABI
  1129     __ andptr(rsp, -16);                            // align stack as required by ABI
  1130     BLOCK_COMMENT("call MacroAssembler::debug");
  1130     BLOCK_COMMENT("call MacroAssembler::debug");
  1131     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
  1131     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
  1132     __ mov(rsp, r12);                               // restore rsp
  1132     __ hlt();
  1133     __ popa();                                      // pop registers (includes r12)
       
  1134     __ ret(4 * wordSize);                           // pop caller saved stuff
       
  1135 
       
  1136     return start;
  1133     return start;
  1137   }
  1134   }
  1138 
  1135 
  1139   //
  1136   //
  1140   // Verify that a register contains clean 32-bits positive value
  1137   // Verify that a register contains clean 32-bits positive value
  1289     Label L_loop;
  1286     Label L_loop;
  1290     __ align(OptoLoopAlignment);
  1287     __ align(OptoLoopAlignment);
  1291     if (UseUnalignedLoadStores) {
  1288     if (UseUnalignedLoadStores) {
  1292       Label L_end;
  1289       Label L_end;
  1293       // Copy 64-bytes per iteration
  1290       // Copy 64-bytes per iteration
  1294       __ BIND(L_loop);
       
  1295       if (UseAVX > 2) {
  1291       if (UseAVX > 2) {
       
  1292         Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
       
  1293 
       
  1294         __ BIND(L_copy_bytes);
       
  1295         __ cmpptr(qword_count, (-1 * AVX3Threshold / 8));
       
  1296         __ jccb(Assembler::less, L_above_threshold);
       
  1297         __ jmpb(L_below_threshold);
       
  1298 
       
  1299         __ bind(L_loop_avx512);
  1296         __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
  1300         __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
  1297         __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
  1301         __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
  1298       } else if (UseAVX == 2) {
  1302         __ bind(L_above_threshold);
       
  1303         __ addptr(qword_count, 8);
       
  1304         __ jcc(Assembler::lessEqual, L_loop_avx512);
       
  1305         __ jmpb(L_32_byte_head);
       
  1306 
       
  1307         __ bind(L_loop_avx2);
  1299         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
  1308         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
  1300         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
  1309         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
  1301         __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
  1310         __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
  1302         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
  1311         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
       
  1312         __ bind(L_below_threshold);
       
  1313         __ addptr(qword_count, 8);
       
  1314         __ jcc(Assembler::lessEqual, L_loop_avx2);
       
  1315 
       
  1316         __ bind(L_32_byte_head);
       
  1317         __ subptr(qword_count, 4);  // sub(8) and add(4)
       
  1318         __ jccb(Assembler::greater, L_end);
  1303       } else {
  1319       } else {
  1304         __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
  1320         __ BIND(L_loop);
  1305         __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
  1321         if (UseAVX == 2) {
  1306         __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
  1322           __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
  1307         __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
  1323           __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
  1308         __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
  1324           __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
  1309         __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
  1325           __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
  1310         __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
  1326         } else {
  1311         __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
  1327           __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
       
  1328           __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
       
  1329           __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
       
  1330           __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
       
  1331           __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
       
  1332           __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
       
  1333           __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
       
  1334           __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
       
  1335         }
       
  1336 
       
  1337         __ BIND(L_copy_bytes);
       
  1338         __ addptr(qword_count, 8);
       
  1339         __ jcc(Assembler::lessEqual, L_loop);
       
  1340         __ subptr(qword_count, 4);  // sub(8) and add(4)
       
  1341         __ jccb(Assembler::greater, L_end);
  1312       }
  1342       }
  1313       __ BIND(L_copy_bytes);
       
  1314       __ addptr(qword_count, 8);
       
  1315       __ jcc(Assembler::lessEqual, L_loop);
       
  1316       __ subptr(qword_count, 4);  // sub(8) and add(4)
       
  1317       __ jccb(Assembler::greater, L_end);
       
  1318       // Copy trailing 32 bytes
  1343       // Copy trailing 32 bytes
  1319       if (UseAVX >= 2) {
  1344       if (UseAVX >= 2) {
  1320         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
  1345         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
  1321         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
  1346         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
  1322       } else {
  1347       } else {
  1369     Label L_loop;
  1394     Label L_loop;
  1370     __ align(OptoLoopAlignment);
  1395     __ align(OptoLoopAlignment);
  1371     if (UseUnalignedLoadStores) {
  1396     if (UseUnalignedLoadStores) {
  1372       Label L_end;
  1397       Label L_end;
  1373       // Copy 64-bytes per iteration
  1398       // Copy 64-bytes per iteration
  1374       __ BIND(L_loop);
       
  1375       if (UseAVX > 2) {
  1399       if (UseAVX > 2) {
       
  1400         Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
       
  1401 
       
  1402         __ BIND(L_copy_bytes);
       
  1403         __ cmpptr(qword_count, (AVX3Threshold / 8));
       
  1404         __ jccb(Assembler::greater, L_above_threshold);
       
  1405         __ jmpb(L_below_threshold);
       
  1406 
       
  1407         __ BIND(L_loop_avx512);
  1376         __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit);
  1408         __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit);
  1377         __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
  1409         __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
  1378       } else if (UseAVX == 2) {
  1410         __ bind(L_above_threshold);
       
  1411         __ subptr(qword_count, 8);
       
  1412         __ jcc(Assembler::greaterEqual, L_loop_avx512);
       
  1413         __ jmpb(L_32_byte_head);
       
  1414 
       
  1415         __ bind(L_loop_avx2);
  1379         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
  1416         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
  1380         __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
  1417         __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
  1381         __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
  1418         __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
  1382         __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
  1419         __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
       
  1420         __ bind(L_below_threshold);
       
  1421         __ subptr(qword_count, 8);
       
  1422         __ jcc(Assembler::greaterEqual, L_loop_avx2);
       
  1423 
       
  1424         __ bind(L_32_byte_head);
       
  1425         __ addptr(qword_count, 4);  // add(8) and sub(4)
       
  1426         __ jccb(Assembler::less, L_end);
  1383       } else {
  1427       } else {
  1384         __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
  1428         __ BIND(L_loop);
  1385         __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
  1429         if (UseAVX == 2) {
  1386         __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
  1430           __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
  1387         __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
  1431           __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
  1388         __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
  1432           __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
  1389         __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
  1433           __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
  1390         __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
  1434         } else {
  1391         __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
  1435           __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
       
  1436           __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
       
  1437           __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
       
  1438           __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
       
  1439           __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
       
  1440           __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
       
  1441           __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
       
  1442           __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
       
  1443         }
       
  1444 
       
  1445         __ BIND(L_copy_bytes);
       
  1446         __ subptr(qword_count, 8);
       
  1447         __ jcc(Assembler::greaterEqual, L_loop);
       
  1448 
       
  1449         __ addptr(qword_count, 4);  // add(8) and sub(4)
       
  1450         __ jccb(Assembler::less, L_end);
  1392       }
  1451       }
  1393       __ BIND(L_copy_bytes);
       
  1394       __ subptr(qword_count, 8);
       
  1395       __ jcc(Assembler::greaterEqual, L_loop);
       
  1396 
       
  1397       __ addptr(qword_count, 4);  // add(8) and sub(4)
       
  1398       __ jccb(Assembler::less, L_end);
       
  1399       // Copy trailing 32 bytes
  1452       // Copy trailing 32 bytes
  1400       if (UseAVX >= 2) {
  1453       if (UseAVX >= 2) {
  1401         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
  1454         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
  1402         __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
  1455         __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
  1403       } else {
  1456       } else {
  1431     }
  1484     }
  1432     __ addptr(qword_count, 4);
  1485     __ addptr(qword_count, 4);
  1433     __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
  1486     __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
  1434   }
  1487   }
  1435 
  1488 
  1436 
       
  1437   // Arguments:
  1489   // Arguments:
  1438   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1490   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1439   //             ignored
  1491   //             ignored
  1440   //   name    - stub name string
  1492   //   name    - stub name string
  1441   //
  1493   //
  1480     }
  1532     }
  1481 
  1533 
  1482     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1534     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1483                       // r9 and r10 may be used to save non-volatile registers
  1535                       // r9 and r10 may be used to save non-volatile registers
  1484 
  1536 
  1485     // 'from', 'to' and 'count' are now valid
  1537     {
  1486     __ movptr(byte_count, count);
  1538       // UnsafeCopyMemory page error: continue after ucm
  1487     __ shrptr(count, 3); // count => qword_count
  1539       UnsafeCopyMemoryMark ucmm(this, !aligned, true);
  1488 
  1540       // 'from', 'to' and 'count' are now valid
  1489     // Copy from low to high addresses.  Use 'to' as scratch.
  1541       __ movptr(byte_count, count);
  1490     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  1542       __ shrptr(count, 3); // count => qword_count
  1491     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
  1543 
  1492     __ negptr(qword_count); // make the count negative
  1544       // Copy from low to high addresses.  Use 'to' as scratch.
  1493     __ jmp(L_copy_bytes);
  1545       __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  1494 
  1546       __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
  1495     // Copy trailing qwords
  1547       __ negptr(qword_count); // make the count negative
  1496   __ BIND(L_copy_8_bytes);
  1548       __ jmp(L_copy_bytes);
  1497     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
  1549 
  1498     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
  1550       // Copy trailing qwords
  1499     __ increment(qword_count);
  1551     __ BIND(L_copy_8_bytes);
  1500     __ jcc(Assembler::notZero, L_copy_8_bytes);
  1552       __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
  1501 
  1553       __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
  1502     // Check for and copy trailing dword
  1554       __ increment(qword_count);
  1503   __ BIND(L_copy_4_bytes);
  1555       __ jcc(Assembler::notZero, L_copy_8_bytes);
  1504     __ testl(byte_count, 4);
  1556 
  1505     __ jccb(Assembler::zero, L_copy_2_bytes);
  1557       // Check for and copy trailing dword
  1506     __ movl(rax, Address(end_from, 8));
  1558     __ BIND(L_copy_4_bytes);
  1507     __ movl(Address(end_to, 8), rax);
  1559       __ testl(byte_count, 4);
  1508 
  1560       __ jccb(Assembler::zero, L_copy_2_bytes);
  1509     __ addptr(end_from, 4);
  1561       __ movl(rax, Address(end_from, 8));
  1510     __ addptr(end_to, 4);
  1562       __ movl(Address(end_to, 8), rax);
  1511 
  1563 
  1512     // Check for and copy trailing word
  1564       __ addptr(end_from, 4);
  1513   __ BIND(L_copy_2_bytes);
  1565       __ addptr(end_to, 4);
  1514     __ testl(byte_count, 2);
  1566 
  1515     __ jccb(Assembler::zero, L_copy_byte);
  1567       // Check for and copy trailing word
  1516     __ movw(rax, Address(end_from, 8));
  1568     __ BIND(L_copy_2_bytes);
  1517     __ movw(Address(end_to, 8), rax);
  1569       __ testl(byte_count, 2);
  1518 
  1570       __ jccb(Assembler::zero, L_copy_byte);
  1519     __ addptr(end_from, 2);
  1571       __ movw(rax, Address(end_from, 8));
  1520     __ addptr(end_to, 2);
  1572       __ movw(Address(end_to, 8), rax);
  1521 
  1573 
  1522     // Check for and copy trailing byte
  1574       __ addptr(end_from, 2);
  1523   __ BIND(L_copy_byte);
  1575       __ addptr(end_to, 2);
  1524     __ testl(byte_count, 1);
  1576 
  1525     __ jccb(Assembler::zero, L_exit);
  1577       // Check for and copy trailing byte
  1526     __ movb(rax, Address(end_from, 8));
  1578     __ BIND(L_copy_byte);
  1527     __ movb(Address(end_to, 8), rax);
  1579       __ testl(byte_count, 1);
  1528 
  1580       __ jccb(Assembler::zero, L_exit);
       
  1581       __ movb(rax, Address(end_from, 8));
       
  1582       __ movb(Address(end_to, 8), rax);
       
  1583     }
  1529   __ BIND(L_exit);
  1584   __ BIND(L_exit);
       
  1585     address ucme_exit_pc = __ pc();
  1530     restore_arg_regs();
  1586     restore_arg_regs();
  1531     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
  1587     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
  1532     __ xorptr(rax, rax); // return 0
  1588     __ xorptr(rax, rax); // return 0
  1533     __ vzeroupper();
  1589     __ vzeroupper();
  1534     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1590     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1535     __ ret(0);
  1591     __ ret(0);
  1536 
  1592 
  1537     // Copy in multi-bytes chunks
  1593     {
  1538     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
  1594       UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
  1539     __ jmp(L_copy_4_bytes);
  1595       // Copy in multi-bytes chunks
  1540 
  1596       copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
       
  1597       __ jmp(L_copy_4_bytes);
       
  1598     }
  1541     return start;
  1599     return start;
  1542   }
  1600   }
  1543 
  1601 
  1544   // Arguments:
  1602   // Arguments:
  1545   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1603   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1580 
  1638 
  1581     array_overlap_test(nooverlap_target, Address::times_1);
  1639     array_overlap_test(nooverlap_target, Address::times_1);
  1582     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1640     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1583                       // r9 and r10 may be used to save non-volatile registers
  1641                       // r9 and r10 may be used to save non-volatile registers
  1584 
  1642 
  1585     // 'from', 'to' and 'count' are now valid
  1643     {
  1586     __ movptr(byte_count, count);
  1644       // UnsafeCopyMemory page error: continue after ucm
  1587     __ shrptr(count, 3);   // count => qword_count
  1645       UnsafeCopyMemoryMark ucmm(this, !aligned, true);
  1588 
  1646       // 'from', 'to' and 'count' are now valid
  1589     // Copy from high to low addresses.
  1647       __ movptr(byte_count, count);
  1590 
  1648       __ shrptr(count, 3);   // count => qword_count
  1591     // Check for and copy trailing byte
  1649 
  1592     __ testl(byte_count, 1);
  1650       // Copy from high to low addresses.
  1593     __ jcc(Assembler::zero, L_copy_2_bytes);
  1651 
  1594     __ movb(rax, Address(from, byte_count, Address::times_1, -1));
  1652       // Check for and copy trailing byte
  1595     __ movb(Address(to, byte_count, Address::times_1, -1), rax);
  1653       __ testl(byte_count, 1);
  1596     __ decrement(byte_count); // Adjust for possible trailing word
  1654       __ jcc(Assembler::zero, L_copy_2_bytes);
  1597 
  1655       __ movb(rax, Address(from, byte_count, Address::times_1, -1));
  1598     // Check for and copy trailing word
  1656       __ movb(Address(to, byte_count, Address::times_1, -1), rax);
  1599   __ BIND(L_copy_2_bytes);
  1657       __ decrement(byte_count); // Adjust for possible trailing word
  1600     __ testl(byte_count, 2);
  1658 
  1601     __ jcc(Assembler::zero, L_copy_4_bytes);
  1659       // Check for and copy trailing word
  1602     __ movw(rax, Address(from, byte_count, Address::times_1, -2));
  1660     __ BIND(L_copy_2_bytes);
  1603     __ movw(Address(to, byte_count, Address::times_1, -2), rax);
  1661       __ testl(byte_count, 2);
  1604 
  1662       __ jcc(Assembler::zero, L_copy_4_bytes);
  1605     // Check for and copy trailing dword
  1663       __ movw(rax, Address(from, byte_count, Address::times_1, -2));
  1606   __ BIND(L_copy_4_bytes);
  1664       __ movw(Address(to, byte_count, Address::times_1, -2), rax);
  1607     __ testl(byte_count, 4);
  1665 
  1608     __ jcc(Assembler::zero, L_copy_bytes);
  1666       // Check for and copy trailing dword
  1609     __ movl(rax, Address(from, qword_count, Address::times_8));
  1667     __ BIND(L_copy_4_bytes);
  1610     __ movl(Address(to, qword_count, Address::times_8), rax);
  1668       __ testl(byte_count, 4);
  1611     __ jmp(L_copy_bytes);
  1669       __ jcc(Assembler::zero, L_copy_bytes);
  1612 
  1670       __ movl(rax, Address(from, qword_count, Address::times_8));
  1613     // Copy trailing qwords
  1671       __ movl(Address(to, qword_count, Address::times_8), rax);
  1614   __ BIND(L_copy_8_bytes);
  1672       __ jmp(L_copy_bytes);
  1615     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  1673 
  1616     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
  1674       // Copy trailing qwords
  1617     __ decrement(qword_count);
  1675     __ BIND(L_copy_8_bytes);
  1618     __ jcc(Assembler::notZero, L_copy_8_bytes);
  1676       __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  1619 
  1677       __ movq(Address(to, qword_count, Address::times_8, -8), rax);
       
  1678       __ decrement(qword_count);
       
  1679       __ jcc(Assembler::notZero, L_copy_8_bytes);
       
  1680     }
  1620     restore_arg_regs();
  1681     restore_arg_regs();
  1621     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
  1682     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
  1622     __ xorptr(rax, rax); // return 0
  1683     __ xorptr(rax, rax); // return 0
  1623     __ vzeroupper();
  1684     __ vzeroupper();
  1624     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1685     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1625     __ ret(0);
  1686     __ ret(0);
  1626 
  1687 
  1627     // Copy in multi-bytes chunks
  1688     {
  1628     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
  1689       // UnsafeCopyMemory page error: continue after ucm
  1629 
  1690       UnsafeCopyMemoryMark ucmm(this, !aligned, true);
       
  1691       // Copy in multi-bytes chunks
       
  1692       copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
       
  1693     }
  1630     restore_arg_regs();
  1694     restore_arg_regs();
  1631     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
  1695     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
  1632     __ xorptr(rax, rax); // return 0
  1696     __ xorptr(rax, rax); // return 0
  1633     __ vzeroupper();
  1697     __ vzeroupper();
  1634     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1698     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1682     }
  1746     }
  1683 
  1747 
  1684     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1748     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1685                       // r9 and r10 may be used to save non-volatile registers
  1749                       // r9 and r10 may be used to save non-volatile registers
  1686 
  1750 
  1687     // 'from', 'to' and 'count' are now valid
  1751     {
  1688     __ movptr(word_count, count);
  1752       // UnsafeCopyMemory page error: continue after ucm
  1689     __ shrptr(count, 2); // count => qword_count
  1753       UnsafeCopyMemoryMark ucmm(this, !aligned, true);
  1690 
  1754       // 'from', 'to' and 'count' are now valid
  1691     // Copy from low to high addresses.  Use 'to' as scratch.
  1755       __ movptr(word_count, count);
  1692     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  1756       __ shrptr(count, 2); // count => qword_count
  1693     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
  1757 
  1694     __ negptr(qword_count);
  1758       // Copy from low to high addresses.  Use 'to' as scratch.
  1695     __ jmp(L_copy_bytes);
  1759       __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  1696 
  1760       __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
  1697     // Copy trailing qwords
  1761       __ negptr(qword_count);
  1698   __ BIND(L_copy_8_bytes);
  1762       __ jmp(L_copy_bytes);
  1699     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
  1763 
  1700     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
  1764       // Copy trailing qwords
  1701     __ increment(qword_count);
  1765     __ BIND(L_copy_8_bytes);
  1702     __ jcc(Assembler::notZero, L_copy_8_bytes);
  1766       __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
  1703 
  1767       __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
  1704     // Original 'dest' is trashed, so we can't use it as a
  1768       __ increment(qword_count);
  1705     // base register for a possible trailing word copy
  1769       __ jcc(Assembler::notZero, L_copy_8_bytes);
  1706 
  1770 
  1707     // Check for and copy trailing dword
  1771       // Original 'dest' is trashed, so we can't use it as a
  1708   __ BIND(L_copy_4_bytes);
  1772       // base register for a possible trailing word copy
  1709     __ testl(word_count, 2);
  1773 
  1710     __ jccb(Assembler::zero, L_copy_2_bytes);
  1774       // Check for and copy trailing dword
  1711     __ movl(rax, Address(end_from, 8));
  1775     __ BIND(L_copy_4_bytes);
  1712     __ movl(Address(end_to, 8), rax);
  1776       __ testl(word_count, 2);
  1713 
  1777       __ jccb(Assembler::zero, L_copy_2_bytes);
  1714     __ addptr(end_from, 4);
  1778       __ movl(rax, Address(end_from, 8));
  1715     __ addptr(end_to, 4);
  1779       __ movl(Address(end_to, 8), rax);
  1716 
  1780 
  1717     // Check for and copy trailing word
  1781       __ addptr(end_from, 4);
  1718   __ BIND(L_copy_2_bytes);
  1782       __ addptr(end_to, 4);
  1719     __ testl(word_count, 1);
  1783 
  1720     __ jccb(Assembler::zero, L_exit);
  1784       // Check for and copy trailing word
  1721     __ movw(rax, Address(end_from, 8));
  1785     __ BIND(L_copy_2_bytes);
  1722     __ movw(Address(end_to, 8), rax);
  1786       __ testl(word_count, 1);
  1723 
  1787       __ jccb(Assembler::zero, L_exit);
       
  1788       __ movw(rax, Address(end_from, 8));
       
  1789       __ movw(Address(end_to, 8), rax);
       
  1790     }
  1724   __ BIND(L_exit);
  1791   __ BIND(L_exit);
       
  1792     address ucme_exit_pc = __ pc();
  1725     restore_arg_regs();
  1793     restore_arg_regs();
  1726     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
  1794     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
  1727     __ xorptr(rax, rax); // return 0
  1795     __ xorptr(rax, rax); // return 0
  1728     __ vzeroupper();
  1796     __ vzeroupper();
  1729     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1797     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1730     __ ret(0);
  1798     __ ret(0);
  1731 
  1799 
  1732     // Copy in multi-bytes chunks
  1800     {
  1733     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
  1801       UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
  1734     __ jmp(L_copy_4_bytes);
  1802       // Copy in multi-bytes chunks
       
  1803       copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
       
  1804       __ jmp(L_copy_4_bytes);
       
  1805     }
  1735 
  1806 
  1736     return start;
  1807     return start;
  1737   }
  1808   }
  1738 
  1809 
  1739   address generate_fill(BasicType t, bool aligned, const char *name) {
  1810   address generate_fill(BasicType t, bool aligned, const char *name) {
  1796 
  1867 
  1797     array_overlap_test(nooverlap_target, Address::times_2);
  1868     array_overlap_test(nooverlap_target, Address::times_2);
  1798     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1869     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1799                       // r9 and r10 may be used to save non-volatile registers
  1870                       // r9 and r10 may be used to save non-volatile registers
  1800 
  1871 
  1801     // 'from', 'to' and 'count' are now valid
  1872     {
  1802     __ movptr(word_count, count);
  1873       // UnsafeCopyMemory page error: continue after ucm
  1803     __ shrptr(count, 2); // count => qword_count
  1874       UnsafeCopyMemoryMark ucmm(this, !aligned, true);
  1804 
  1875       // 'from', 'to' and 'count' are now valid
  1805     // Copy from high to low addresses.  Use 'to' as scratch.
  1876       __ movptr(word_count, count);
  1806 
  1877       __ shrptr(count, 2); // count => qword_count
  1807     // Check for and copy trailing word
  1878 
  1808     __ testl(word_count, 1);
  1879       // Copy from high to low addresses.  Use 'to' as scratch.
  1809     __ jccb(Assembler::zero, L_copy_4_bytes);
  1880 
  1810     __ movw(rax, Address(from, word_count, Address::times_2, -2));
  1881       // Check for and copy trailing word
  1811     __ movw(Address(to, word_count, Address::times_2, -2), rax);
  1882       __ testl(word_count, 1);
  1812 
  1883       __ jccb(Assembler::zero, L_copy_4_bytes);
  1813     // Check for and copy trailing dword
  1884       __ movw(rax, Address(from, word_count, Address::times_2, -2));
  1814   __ BIND(L_copy_4_bytes);
  1885       __ movw(Address(to, word_count, Address::times_2, -2), rax);
  1815     __ testl(word_count, 2);
  1886 
  1816     __ jcc(Assembler::zero, L_copy_bytes);
  1887      // Check for and copy trailing dword
  1817     __ movl(rax, Address(from, qword_count, Address::times_8));
  1888     __ BIND(L_copy_4_bytes);
  1818     __ movl(Address(to, qword_count, Address::times_8), rax);
  1889       __ testl(word_count, 2);
  1819     __ jmp(L_copy_bytes);
  1890       __ jcc(Assembler::zero, L_copy_bytes);
  1820 
  1891       __ movl(rax, Address(from, qword_count, Address::times_8));
  1821     // Copy trailing qwords
  1892       __ movl(Address(to, qword_count, Address::times_8), rax);
  1822   __ BIND(L_copy_8_bytes);
  1893       __ jmp(L_copy_bytes);
  1823     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  1894 
  1824     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
  1895       // Copy trailing qwords
  1825     __ decrement(qword_count);
  1896     __ BIND(L_copy_8_bytes);
  1826     __ jcc(Assembler::notZero, L_copy_8_bytes);
  1897       __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  1827 
  1898       __ movq(Address(to, qword_count, Address::times_8, -8), rax);
       
  1899       __ decrement(qword_count);
       
  1900       __ jcc(Assembler::notZero, L_copy_8_bytes);
       
  1901     }
  1828     restore_arg_regs();
  1902     restore_arg_regs();
  1829     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
  1903     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
  1830     __ xorptr(rax, rax); // return 0
  1904     __ xorptr(rax, rax); // return 0
  1831     __ vzeroupper();
  1905     __ vzeroupper();
  1832     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1906     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1833     __ ret(0);
  1907     __ ret(0);
  1834 
  1908 
  1835     // Copy in multi-bytes chunks
  1909     {
  1836     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
  1910       // UnsafeCopyMemory page error: continue after ucm
  1837 
  1911       UnsafeCopyMemoryMark ucmm(this, !aligned, true);
       
  1912       // Copy in multi-bytes chunks
       
  1913       copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
       
  1914     }
  1838     restore_arg_regs();
  1915     restore_arg_regs();
  1839     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
  1916     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
  1840     __ xorptr(rax, rax); // return 0
  1917     __ xorptr(rax, rax); // return 0
  1841     __ vzeroupper();
  1918     __ vzeroupper();
  1842     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1919     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1903 
  1980 
  1904     BasicType type = is_oop ? T_OBJECT : T_INT;
  1981     BasicType type = is_oop ? T_OBJECT : T_INT;
  1905     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
  1982     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
  1906     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
  1983     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
  1907 
  1984 
  1908     // 'from', 'to' and 'count' are now valid
  1985     {
  1909     __ movptr(dword_count, count);
  1986       // UnsafeCopyMemory page error: continue after ucm
  1910     __ shrptr(count, 1); // count => qword_count
  1987       UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
  1911 
  1988       // 'from', 'to' and 'count' are now valid
  1912     // Copy from low to high addresses.  Use 'to' as scratch.
  1989       __ movptr(dword_count, count);
  1913     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  1990       __ shrptr(count, 1); // count => qword_count
  1914     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
  1991 
  1915     __ negptr(qword_count);
  1992       // Copy from low to high addresses.  Use 'to' as scratch.
  1916     __ jmp(L_copy_bytes);
  1993       __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  1917 
  1994       __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
  1918     // Copy trailing qwords
  1995       __ negptr(qword_count);
  1919   __ BIND(L_copy_8_bytes);
  1996       __ jmp(L_copy_bytes);
  1920     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
  1997 
  1921     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
  1998       // Copy trailing qwords
  1922     __ increment(qword_count);
  1999     __ BIND(L_copy_8_bytes);
  1923     __ jcc(Assembler::notZero, L_copy_8_bytes);
  2000       __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
  1924 
  2001       __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
  1925     // Check for and copy trailing dword
  2002       __ increment(qword_count);
  1926   __ BIND(L_copy_4_bytes);
  2003       __ jcc(Assembler::notZero, L_copy_8_bytes);
  1927     __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
  2004 
  1928     __ jccb(Assembler::zero, L_exit);
  2005       // Check for and copy trailing dword
  1929     __ movl(rax, Address(end_from, 8));
  2006     __ BIND(L_copy_4_bytes);
  1930     __ movl(Address(end_to, 8), rax);
  2007       __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
  1931 
  2008       __ jccb(Assembler::zero, L_exit);
       
  2009       __ movl(rax, Address(end_from, 8));
       
  2010       __ movl(Address(end_to, 8), rax);
       
  2011     }
  1932   __ BIND(L_exit);
  2012   __ BIND(L_exit);
       
  2013     address ucme_exit_pc = __ pc();
  1933     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
  2014     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
  1934     restore_arg_regs_using_thread();
  2015     restore_arg_regs_using_thread();
  1935     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
  2016     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
  1936     __ vzeroupper();
  2017     __ vzeroupper();
  1937     __ xorptr(rax, rax); // return 0
  2018     __ xorptr(rax, rax); // return 0
  1938     __ leave(); // required for proper stackwalking of RuntimeStub frame
  2019     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1939     __ ret(0);
  2020     __ ret(0);
  1940 
  2021 
  1941     // Copy in multi-bytes chunks
  2022     {
  1942     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
  2023       UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc);
  1943     __ jmp(L_copy_4_bytes);
  2024       // Copy in multi-bytes chunks
       
  2025       copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
       
  2026       __ jmp(L_copy_4_bytes);
       
  2027     }
  1944 
  2028 
  1945     return start;
  2029     return start;
  1946   }
  2030   }
  1947 
  2031 
  1948   // Arguments:
  2032   // Arguments:
  1999     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
  2083     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
  2000     // no registers are destroyed by this call
  2084     // no registers are destroyed by this call
  2001     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
  2085     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
  2002 
  2086 
  2003     assert_clean_int(count, rax); // Make sure 'count' is clean int.
  2087     assert_clean_int(count, rax); // Make sure 'count' is clean int.
  2004     // 'from', 'to' and 'count' are now valid
  2088     {
  2005     __ movptr(dword_count, count);
  2089       // UnsafeCopyMemory page error: continue after ucm
  2006     __ shrptr(count, 1); // count => qword_count
  2090       UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
  2007 
  2091       // 'from', 'to' and 'count' are now valid
  2008     // Copy from high to low addresses.  Use 'to' as scratch.
  2092       __ movptr(dword_count, count);
  2009 
  2093       __ shrptr(count, 1); // count => qword_count
  2010     // Check for and copy trailing dword
  2094 
  2011     __ testl(dword_count, 1);
  2095       // Copy from high to low addresses.  Use 'to' as scratch.
  2012     __ jcc(Assembler::zero, L_copy_bytes);
  2096 
  2013     __ movl(rax, Address(from, dword_count, Address::times_4, -4));
  2097       // Check for and copy trailing dword
  2014     __ movl(Address(to, dword_count, Address::times_4, -4), rax);
  2098       __ testl(dword_count, 1);
  2015     __ jmp(L_copy_bytes);
  2099       __ jcc(Assembler::zero, L_copy_bytes);
  2016 
  2100       __ movl(rax, Address(from, dword_count, Address::times_4, -4));
  2017     // Copy trailing qwords
  2101       __ movl(Address(to, dword_count, Address::times_4, -4), rax);
  2018   __ BIND(L_copy_8_bytes);
  2102       __ jmp(L_copy_bytes);
  2019     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  2103 
  2020     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
  2104       // Copy trailing qwords
  2021     __ decrement(qword_count);
  2105     __ BIND(L_copy_8_bytes);
  2022     __ jcc(Assembler::notZero, L_copy_8_bytes);
  2106       __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  2023 
  2107       __ movq(Address(to, qword_count, Address::times_8, -8), rax);
       
  2108       __ decrement(qword_count);
       
  2109       __ jcc(Assembler::notZero, L_copy_8_bytes);
       
  2110     }
  2024     if (is_oop) {
  2111     if (is_oop) {
  2025       __ jmp(L_exit);
  2112       __ jmp(L_exit);
  2026     }
  2113     }
  2027     restore_arg_regs_using_thread();
  2114     restore_arg_regs_using_thread();
  2028     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
  2115     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
  2029     __ xorptr(rax, rax); // return 0
  2116     __ xorptr(rax, rax); // return 0
  2030     __ vzeroupper();
  2117     __ vzeroupper();
  2031     __ leave(); // required for proper stackwalking of RuntimeStub frame
  2118     __ leave(); // required for proper stackwalking of RuntimeStub frame
  2032     __ ret(0);
  2119     __ ret(0);
  2033 
  2120 
  2034     // Copy in multi-bytes chunks
  2121     {
  2035     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
  2122       // UnsafeCopyMemory page error: continue after ucm
       
  2123       UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
       
  2124       // Copy in multi-bytes chunks
       
  2125       copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
       
  2126     }
  2036 
  2127 
  2037   __ BIND(L_exit);
  2128   __ BIND(L_exit);
  2038     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
  2129     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
  2039     restore_arg_regs_using_thread();
  2130     restore_arg_regs_using_thread();
  2040     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
  2131     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
  2100     }
  2191     }
  2101 
  2192 
  2102     BasicType type = is_oop ? T_OBJECT : T_LONG;
  2193     BasicType type = is_oop ? T_OBJECT : T_LONG;
  2103     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
  2194     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
  2104     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
  2195     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
  2105 
  2196     {
  2106     // Copy from low to high addresses.  Use 'to' as scratch.
  2197       // UnsafeCopyMemory page error: continue after ucm
  2107     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  2198       UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
  2108     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
  2199 
  2109     __ negptr(qword_count);
  2200       // Copy from low to high addresses.  Use 'to' as scratch.
  2110     __ jmp(L_copy_bytes);
  2201       __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  2111 
  2202       __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
  2112     // Copy trailing qwords
  2203       __ negptr(qword_count);
  2113   __ BIND(L_copy_8_bytes);
  2204       __ jmp(L_copy_bytes);
  2114     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
  2205 
  2115     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
  2206       // Copy trailing qwords
  2116     __ increment(qword_count);
  2207     __ BIND(L_copy_8_bytes);
  2117     __ jcc(Assembler::notZero, L_copy_8_bytes);
  2208       __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
  2118 
  2209       __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
       
  2210       __ increment(qword_count);
       
  2211       __ jcc(Assembler::notZero, L_copy_8_bytes);
       
  2212     }
  2119     if (is_oop) {
  2213     if (is_oop) {
  2120       __ jmp(L_exit);
  2214       __ jmp(L_exit);
  2121     } else {
  2215     } else {
  2122       restore_arg_regs_using_thread();
  2216       restore_arg_regs_using_thread();
  2123       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
  2217       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
  2125       __ vzeroupper();
  2219       __ vzeroupper();
  2126       __ leave(); // required for proper stackwalking of RuntimeStub frame
  2220       __ leave(); // required for proper stackwalking of RuntimeStub frame
  2127       __ ret(0);
  2221       __ ret(0);
  2128     }
  2222     }
  2129 
  2223 
  2130     // Copy in multi-bytes chunks
  2224     {
  2131     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
  2225       // UnsafeCopyMemory page error: continue after ucm
       
  2226       UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
       
  2227       // Copy in multi-bytes chunks
       
  2228       copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
       
  2229     }
  2132 
  2230 
  2133     __ BIND(L_exit);
  2231     __ BIND(L_exit);
  2134     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
  2232     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
  2135     restore_arg_regs_using_thread();
  2233     restore_arg_regs_using_thread();
  2136     if (is_oop) {
  2234     if (is_oop) {
  2193     }
  2291     }
  2194 
  2292 
  2195     BasicType type = is_oop ? T_OBJECT : T_LONG;
  2293     BasicType type = is_oop ? T_OBJECT : T_LONG;
  2196     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
  2294     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
  2197     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
  2295     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
  2198 
  2296     {
  2199     __ jmp(L_copy_bytes);
  2297       // UnsafeCopyMemory page error: continue after ucm
  2200 
  2298       UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
  2201     // Copy trailing qwords
  2299 
  2202   __ BIND(L_copy_8_bytes);
  2300       __ jmp(L_copy_bytes);
  2203     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  2301 
  2204     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
  2302       // Copy trailing qwords
  2205     __ decrement(qword_count);
  2303     __ BIND(L_copy_8_bytes);
  2206     __ jcc(Assembler::notZero, L_copy_8_bytes);
  2304       __ movq(rax, Address(from, qword_count, Address::times_8, -8));
  2207 
  2305       __ movq(Address(to, qword_count, Address::times_8, -8), rax);
       
  2306       __ decrement(qword_count);
       
  2307       __ jcc(Assembler::notZero, L_copy_8_bytes);
       
  2308     }
  2208     if (is_oop) {
  2309     if (is_oop) {
  2209       __ jmp(L_exit);
  2310       __ jmp(L_exit);
  2210     } else {
  2311     } else {
  2211       restore_arg_regs_using_thread();
  2312       restore_arg_regs_using_thread();
  2212       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
  2313       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
  2213       __ xorptr(rax, rax); // return 0
  2314       __ xorptr(rax, rax); // return 0
  2214       __ vzeroupper();
  2315       __ vzeroupper();
  2215       __ leave(); // required for proper stackwalking of RuntimeStub frame
  2316       __ leave(); // required for proper stackwalking of RuntimeStub frame
  2216       __ ret(0);
  2317       __ ret(0);
  2217     }
  2318     }
  2218 
  2319     {
  2219     // Copy in multi-bytes chunks
  2320       // UnsafeCopyMemory page error: continue after ucm
  2220     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
  2321       UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
  2221 
  2322 
       
  2323       // Copy in multi-bytes chunks
       
  2324       copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
       
  2325     }
  2222     __ BIND(L_exit);
  2326     __ BIND(L_exit);
  2223     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
  2327     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
  2224     restore_arg_regs_using_thread();
  2328     restore_arg_regs_using_thread();
  2225     if (is_oop) {
  2329     if (is_oop) {
  2226       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
  2330       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
  2851 
  2955 
  2852   __ BIND(L_failed);
  2956   __ BIND(L_failed);
  2853     __ xorptr(rax, rax);
  2957     __ xorptr(rax, rax);
  2854     __ notptr(rax); // return -1
  2958     __ notptr(rax); // return -1
  2855     __ leave();   // required for proper stackwalking of RuntimeStub frame
  2959     __ leave();   // required for proper stackwalking of RuntimeStub frame
       
  2960     __ ret(0);
       
  2961 
       
  2962     return start;
       
  2963   }
       
  2964 
       
  2965   address generate_data_cache_writeback() {
       
  2966     const Register src        = c_rarg0;  // source address
       
  2967 
       
  2968     __ align(CodeEntryAlignment);
       
  2969 
       
  2970     StubCodeMark mark(this, "StubRoutines", "_data_cache_writeback");
       
  2971 
       
  2972     address start = __ pc();
       
  2973     __ enter();
       
  2974     __ cache_wb(Address(src, 0));
       
  2975     __ leave();
       
  2976     __ ret(0);
       
  2977 
       
  2978     return start;
       
  2979   }
       
  2980 
       
  2981   address generate_data_cache_writeback_sync() {
       
  2982     const Register is_pre    = c_rarg0;  // pre or post sync
       
  2983 
       
  2984     __ align(CodeEntryAlignment);
       
  2985 
       
  2986     StubCodeMark mark(this, "StubRoutines", "_data_cache_writeback_sync");
       
  2987 
       
  2988     // pre wbsync is a no-op
       
  2989     // post wbsync translates to an sfence
       
  2990 
       
  2991     Label skip;
       
  2992     address start = __ pc();
       
  2993     __ enter();
       
  2994     __ cmpl(is_pre, 0);
       
  2995     __ jcc(Assembler::notEqual, skip);
       
  2996     __ cache_wbsync(false);
       
  2997     __ bind(skip);
       
  2998     __ leave();
  2856     __ ret(0);
  2999     __ ret(0);
  2857 
  3000 
  2858     return start;
  3001     return start;
  2859   }
  3002   }
  2860 
  3003 
  3631 #endif
  3774 #endif
  3632     __ leave(); // required for proper stackwalking of RuntimeStub frame
  3775     __ leave(); // required for proper stackwalking of RuntimeStub frame
  3633     __ ret(0);
  3776     __ ret(0);
  3634     return start;
  3777     return start;
  3635 }
  3778 }
       
  3779 
       
  3780   address generate_electronicCodeBook_encryptAESCrypt() {
       
  3781     __ align(CodeEntryAlignment);
       
  3782     StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_encryptAESCrypt");
       
  3783     address start = __ pc();
       
  3784     const Register from = c_rarg0;  // source array address
       
  3785     const Register to = c_rarg1;  // destination array address
       
  3786     const Register key = c_rarg2;  // key array address
       
  3787     const Register len = c_rarg3;  // src len (must be multiple of blocksize 16)
       
  3788     __ enter(); // required for proper stackwalking of RuntimeStub frame
       
  3789     __ aesecb_encrypt(from, to, key, len);
       
  3790     __ leave(); // required for proper stackwalking of RuntimeStub frame
       
  3791     __ ret(0);
       
  3792     return start;
       
  3793  }
       
  3794 
       
  3795   address generate_electronicCodeBook_decryptAESCrypt() {
       
  3796     __ align(CodeEntryAlignment);
       
  3797     StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_decryptAESCrypt");
       
  3798     address start = __ pc();
       
  3799     const Register from = c_rarg0;  // source array address
       
  3800     const Register to = c_rarg1;  // destination array address
       
  3801     const Register key = c_rarg2;  // key array address
       
  3802     const Register len = c_rarg3;  // src len (must be multiple of blocksize 16)
       
  3803     __ enter(); // required for proper stackwalking of RuntimeStub frame
       
  3804     __ aesecb_decrypt(from, to, key, len);
       
  3805     __ leave(); // required for proper stackwalking of RuntimeStub frame
       
  3806     __ ret(0);
       
  3807     return start;
       
  3808   }
  3636 
  3809 
  3637   address generate_upper_word_mask() {
  3810   address generate_upper_word_mask() {
  3638     __ align(64);
  3811     __ align(64);
  3639     StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
  3812     StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
  3640     address start = __ pc();
  3813     address start = __ pc();
  5915     StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
  6088     StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
  5916 
  6089 
  5917     // support for verify_oop (must happen after universe_init)
  6090     // support for verify_oop (must happen after universe_init)
  5918     StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
  6091     StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
  5919 
  6092 
       
  6093     // data cache line writeback
       
  6094     StubRoutines::_data_cache_writeback = generate_data_cache_writeback();
       
  6095     StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
       
  6096 
  5920     // arraycopy stubs used by compilers
  6097     // arraycopy stubs used by compilers
  5921     generate_arraycopy_stubs();
  6098     generate_arraycopy_stubs();
  5922 
  6099 
  5923     // don't bother generating these AES intrinsic stubs unless global flag is set
  6100     // don't bother generating these AES intrinsic stubs unless global flag is set
  5924     if (UseAESIntrinsics) {
  6101     if (UseAESIntrinsics) {
  5926       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
  6103       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
  5927       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
  6104       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
  5928       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
  6105       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
  5929       if (VM_Version::supports_vaes() &&  VM_Version::supports_avx512vl() && VM_Version::supports_avx512dq() ) {
  6106       if (VM_Version::supports_vaes() &&  VM_Version::supports_avx512vl() && VM_Version::supports_avx512dq() ) {
  5930         StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
  6107         StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
       
  6108         StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
       
  6109         StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
  5931       } else {
  6110       } else {
  5932         StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
  6111         StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
  5933       }
  6112       }
  5934     }
  6113     }
  5935     if (UseAESCTRIntrinsics){
  6114     if (UseAESCTRIntrinsics){
  6034       generate_initial();
  6213       generate_initial();
  6035     }
  6214     }
  6036   }
  6215   }
  6037 }; // end class declaration
  6216 }; // end class declaration
  6038 
  6217 
       
  6218 #define UCM_TABLE_MAX_ENTRIES 16
  6039 void StubGenerator_generate(CodeBuffer* code, bool all) {
  6219 void StubGenerator_generate(CodeBuffer* code, bool all) {
       
  6220   if (UnsafeCopyMemory::_table == NULL) {
       
  6221     UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
       
  6222   }
  6040   StubGenerator g(code, all);
  6223   StubGenerator g(code, all);
  6041 }
  6224 }