hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
changeset 360 21d113ecbf6a
parent 189 4248c8e21063
child 371 1aacedc9db7c
equal deleted inserted replaced
357:f4edb0d9f109 360:21d113ecbf6a
    28 // Declaration and definition of StubGenerator (no .hpp file).
    28 // Declaration and definition of StubGenerator (no .hpp file).
    29 // For a more detailed description of the stub routine structure
    29 // For a more detailed description of the stub routine structure
    30 // see the comment in stubRoutines.hpp
    30 // see the comment in stubRoutines.hpp
    31 
    31 
    32 #define __ _masm->
    32 #define __ _masm->
       
    33 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
    33 
    34 
    34 #ifdef PRODUCT
    35 #ifdef PRODUCT
    35 #define BLOCK_COMMENT(str) /* nothing */
    36 #define BLOCK_COMMENT(str) /* nothing */
    36 #else
    37 #else
    37 #define BLOCK_COMMENT(str) __ block_comment(str)
    38 #define BLOCK_COMMENT(str) __ block_comment(str)
   250     }
   251     }
   251 #endif
   252 #endif
   252 
   253 
   253     // Load up thread register
   254     // Load up thread register
   254     __ movq(r15_thread, thread);
   255     __ movq(r15_thread, thread);
       
   256     __ reinit_heapbase();
   255 
   257 
   256 #ifdef ASSERT
   258 #ifdef ASSERT
   257     // make sure we have no pending exceptions
   259     // make sure we have no pending exceptions
   258     {
   260     {
   259       Label L;
   261       Label L;
   943     __ movptr(c_rarg3, (int64_t) Universe::verify_oop_bits());
   945     __ movptr(c_rarg3, (int64_t) Universe::verify_oop_bits());
   944     __ cmpq(c_rarg2, c_rarg3);
   946     __ cmpq(c_rarg2, c_rarg3);
   945     __ jcc(Assembler::notZero, error);
   947     __ jcc(Assembler::notZero, error);
   946 
   948 
   947     // make sure klass is 'reasonable'
   949     // make sure klass is 'reasonable'
   948     __ movq(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
   950     __ load_klass(rax, rax);  // get klass
   949     __ testq(rax, rax);
   951     __ testq(rax, rax);
   950     __ jcc(Assembler::zero, error); // if klass is NULL it is broken
   952     __ jcc(Assembler::zero, error); // if klass is NULL it is broken
   951     // Check if the klass is in the right area of memory
   953     // Check if the klass is in the right area of memory
   952     __ movq(c_rarg2, rax);
   954     __ movq(c_rarg2, rax);
   953     __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
   955     __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
   955     __ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
   957     __ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
   956     __ cmpq(c_rarg2, c_rarg3);
   958     __ cmpq(c_rarg2, c_rarg3);
   957     __ jcc(Assembler::notZero, error);
   959     __ jcc(Assembler::notZero, error);
   958 
   960 
   959     // make sure klass' klass is 'reasonable'
   961     // make sure klass' klass is 'reasonable'
   960     __ movq(rax, Address(rax, oopDesc::klass_offset_in_bytes()));
   962     __ load_klass(rax, rax);
   961     __ testq(rax, rax);
   963     __ testq(rax, rax);
   962     __ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken
   964     __ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken
   963     // Check if the klass' klass is in the right area of memory
   965     // Check if the klass' klass is in the right area of memory
   964     __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
   966     __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
   965     __ andq(rax, c_rarg3);
   967     __ andq(rax, c_rarg3);
   999     __ subq(rsp, frame::arg_reg_save_area_bytes);// windows
  1001     __ subq(rsp, frame::arg_reg_save_area_bytes);// windows
  1000     __ andq(rsp, -16);                           // align stack as required by ABI
  1002     __ andq(rsp, -16);                           // align stack as required by ABI
  1001     BLOCK_COMMENT("call MacroAssembler::debug");
  1003     BLOCK_COMMENT("call MacroAssembler::debug");
  1002     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug)));
  1004     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug)));
  1003     __ movq(rsp, r12);                           // restore rsp
  1005     __ movq(rsp, r12);                           // restore rsp
       
  1006     __ reinit_heapbase();                        // r12 is heapbase
  1004     __ popaq();                                  // pop registers
  1007     __ popaq();                                  // pop registers
  1005     __ ret(3 * wordSize);                        // pop caller saved stuff
  1008     __ ret(3 * wordSize);                        // pop caller saved stuff
  1006 
  1009 
  1007     return start;
  1010     return start;
  1008   }
  1011   }
  1650   }
  1653   }
  1651 
  1654 
  1652   // Arguments:
  1655   // Arguments:
  1653   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1656   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1654   //             ignored
  1657   //             ignored
       
  1658   //   is_oop  - true => oop array, so generate store check code
  1655   //   name    - stub name string
  1659   //   name    - stub name string
  1656   //
  1660   //
  1657   // Inputs:
  1661   // Inputs:
  1658   //   c_rarg0   - source array address
  1662   //   c_rarg0   - source array address
  1659   //   c_rarg1   - destination array address
  1663   //   c_rarg1   - destination array address
  1663   // the hardware handle it.  The two dwords within qwords that span
  1667   // the hardware handle it.  The two dwords within qwords that span
  1664   // cache line boundaries will still be loaded and stored atomicly.
  1668   // cache line boundaries will still be loaded and stored atomicly.
  1665   //
  1669   //
  1666   // Side Effects:
  1670   // Side Effects:
  1667   //   disjoint_int_copy_entry is set to the no-overlap entry point
  1671   //   disjoint_int_copy_entry is set to the no-overlap entry point
  1668   //   used by generate_conjoint_int_copy().
  1672   //   used by generate_conjoint_int_oop_copy().
  1669   //
  1673   //
  1670   address generate_disjoint_int_copy(bool aligned, const char *name) {
  1674   address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
  1671     __ align(CodeEntryAlignment);
  1675     __ align(CodeEntryAlignment);
  1672     StubCodeMark mark(this, "StubRoutines", name);
  1676     StubCodeMark mark(this, "StubRoutines", name);
  1673     address start = __ pc();
  1677     address start = __ pc();
  1674 
  1678 
  1675     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
  1679     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
  1678     const Register count       = rdx;  // elements count
  1682     const Register count       = rdx;  // elements count
  1679     const Register dword_count = rcx;
  1683     const Register dword_count = rcx;
  1680     const Register qword_count = count;
  1684     const Register qword_count = count;
  1681     const Register end_from    = from; // source array end address
  1685     const Register end_from    = from; // source array end address
  1682     const Register end_to      = to;   // destination array end address
  1686     const Register end_to      = to;   // destination array end address
       
  1687     const Register saved_to    = r11;  // saved destination array address
  1683     // End pointers are inclusive, and if count is not zero they point
  1688     // End pointers are inclusive, and if count is not zero they point
  1684     // to the last unit copied:  end_to[0] := end_from[0]
  1689     // to the last unit copied:  end_to[0] := end_from[0]
  1685 
  1690 
  1686     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1691     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1687     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  1692     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  1688 
  1693 
  1689     disjoint_int_copy_entry = __ pc();
  1694     (is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry) = __ pc();
       
  1695 
       
  1696     if (is_oop) {
       
  1697       // no registers are destroyed by this call
       
  1698       gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
       
  1699     }
       
  1700 
  1690     BLOCK_COMMENT("Entry:");
  1701     BLOCK_COMMENT("Entry:");
  1691     // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1702     // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1692 
  1703 
  1693     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1704     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1694                       // r9 and r10 may be used to save non-volatile registers
  1705                       // r9 and r10 may be used to save non-volatile registers
       
  1706 
       
  1707     if (is_oop) {
       
  1708       __ movq(saved_to, to);
       
  1709     }
  1695 
  1710 
  1696     // 'from', 'to' and 'count' are now valid
  1711     // 'from', 'to' and 'count' are now valid
  1697     __ movq(dword_count, count);
  1712     __ movq(dword_count, count);
  1698     __ shrq(count, 1); // count => qword_count
  1713     __ shrq(count, 1); // count => qword_count
  1699 
  1714 
  1716     __ jccb(Assembler::zero, L_exit);
  1731     __ jccb(Assembler::zero, L_exit);
  1717     __ movl(rax, Address(end_from, 8));
  1732     __ movl(rax, Address(end_from, 8));
  1718     __ movl(Address(end_to, 8), rax);
  1733     __ movl(Address(end_to, 8), rax);
  1719 
  1734 
  1720   __ BIND(L_exit);
  1735   __ BIND(L_exit);
       
  1736     if (is_oop) {
       
  1737       __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4));
       
  1738       gen_write_ref_array_post_barrier(saved_to, end_to, rax);
       
  1739     }
  1721     inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
  1740     inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
  1722     restore_arg_regs();
  1741     restore_arg_regs();
  1723     __ xorq(rax, rax); // return 0
  1742     __ xorq(rax, rax); // return 0
  1724     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1743     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1725     __ ret(0);
  1744     __ ret(0);
  1732   }
  1751   }
  1733 
  1752 
  1734   // Arguments:
  1753   // Arguments:
  1735   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1754   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
  1736   //             ignored
  1755   //             ignored
       
  1756   //   is_oop  - true => oop array, so generate store check code
  1737   //   name    - stub name string
  1757   //   name    - stub name string
  1738   //
  1758   //
  1739   // Inputs:
  1759   // Inputs:
  1740   //   c_rarg0   - source array address
  1760   //   c_rarg0   - source array address
  1741   //   c_rarg1   - destination array address
  1761   //   c_rarg1   - destination array address
  1743   //
  1763   //
  1744   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1764   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
  1745   // the hardware handle it.  The two dwords within qwords that span
  1765   // the hardware handle it.  The two dwords within qwords that span
  1746   // cache line boundaries will still be loaded and stored atomicly.
  1766   // cache line boundaries will still be loaded and stored atomicly.
  1747   //
  1767   //
  1748   address generate_conjoint_int_copy(bool aligned, const char *name) {
  1768   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
  1749     __ align(CodeEntryAlignment);
  1769     __ align(CodeEntryAlignment);
  1750     StubCodeMark mark(this, "StubRoutines", name);
  1770     StubCodeMark mark(this, "StubRoutines", name);
  1751     address start = __ pc();
  1771     address start = __ pc();
  1752 
  1772 
  1753     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes;
  1773     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
  1754     const Register from        = rdi;  // source array address
  1774     const Register from        = rdi;  // source array address
  1755     const Register to          = rsi;  // destination array address
  1775     const Register to          = rsi;  // destination array address
  1756     const Register count       = rdx;  // elements count
  1776     const Register count       = rdx;  // elements count
  1757     const Register dword_count = rcx;
  1777     const Register dword_count = rcx;
  1758     const Register qword_count = count;
  1778     const Register qword_count = count;
  1759 
  1779 
  1760     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1780     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1761     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  1781     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  1762 
  1782 
  1763     int_copy_entry = __ pc();
  1783     if (is_oop) {
       
  1784       // no registers are destroyed by this call
       
  1785       gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
       
  1786     }
       
  1787 
       
  1788     (is_oop ? oop_copy_entry : int_copy_entry) = __ pc();
  1764     BLOCK_COMMENT("Entry:");
  1789     BLOCK_COMMENT("Entry:");
  1765     // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1790     // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1766 
  1791 
  1767     array_overlap_test(disjoint_int_copy_entry, Address::times_4);
  1792     array_overlap_test(is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry,
       
  1793                        Address::times_4);
  1768     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1794     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  1769                       // r9 and r10 may be used to save non-volatile registers
  1795                       // r9 and r10 may be used to save non-volatile registers
  1770 
  1796 
       
  1797     assert_clean_int(count, rax); // Make sure 'count' is clean int.
  1771     // 'from', 'to' and 'count' are now valid
  1798     // 'from', 'to' and 'count' are now valid
  1772     __ movq(dword_count, count);
  1799     __ movq(dword_count, count);
  1773     __ shrq(count, 1); // count => qword_count
  1800     __ shrq(count, 1); // count => qword_count
  1774 
  1801 
  1775     // Copy from high to low addresses.  Use 'to' as scratch.
  1802     // Copy from high to low addresses.  Use 'to' as scratch.
  1787     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
  1814     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
  1788     __ decrementq(qword_count);
  1815     __ decrementq(qword_count);
  1789     __ jcc(Assembler::notZero, L_copy_8_bytes);
  1816     __ jcc(Assembler::notZero, L_copy_8_bytes);
  1790 
  1817 
  1791     inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
  1818     inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
       
  1819     if (is_oop) {
       
  1820       __ jmp(L_exit);
       
  1821     }
  1792     restore_arg_regs();
  1822     restore_arg_regs();
  1793     __ xorq(rax, rax); // return 0
  1823     __ xorq(rax, rax); // return 0
  1794     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1824     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1795     __ ret(0);
  1825     __ ret(0);
  1796 
  1826 
  1797     // Copy in 32-bytes chunks
  1827     // Copy in 32-bytes chunks
  1798     copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
  1828     copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
  1799 
  1829 
  1800     inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
  1830    inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
       
  1831    __ bind(L_exit);
       
  1832      if (is_oop) {
       
  1833        Register end_to = rdx;
       
  1834        __ leaq(end_to, Address(to, dword_count, Address::times_4, -4));
       
  1835        gen_write_ref_array_post_barrier(to, end_to, rax);
       
  1836      }
  1801     restore_arg_regs();
  1837     restore_arg_regs();
  1802     __ xorq(rax, rax); // return 0
  1838     __ xorq(rax, rax); // return 0
  1803     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1839     __ leave(); // required for proper stackwalking of RuntimeStub frame
  1804     __ ret(0);
  1840     __ ret(0);
  1805 
  1841 
  1815   // Inputs:
  1851   // Inputs:
  1816   //   c_rarg0   - source array address
  1852   //   c_rarg0   - source array address
  1817   //   c_rarg1   - destination array address
  1853   //   c_rarg1   - destination array address
  1818   //   c_rarg2   - element count, treated as ssize_t, can be zero
  1854   //   c_rarg2   - element count, treated as ssize_t, can be zero
  1819   //
  1855   //
  1820   // Side Effects:
  1856  // Side Effects:
  1821   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
  1857   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
  1822   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
  1858   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
  1823   //
  1859   //
  1824   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  1860   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  1825     __ align(CodeEntryAlignment);
  1861     __ align(CodeEntryAlignment);
  1855 
  1891 
  1856     // 'from', 'to' and 'qword_count' are now valid
  1892     // 'from', 'to' and 'qword_count' are now valid
  1857 
  1893 
  1858     // Copy from low to high addresses.  Use 'to' as scratch.
  1894     // Copy from low to high addresses.  Use 'to' as scratch.
  1859     __ leaq(end_from, Address(from, qword_count, Address::times_8, -8));
  1895     __ leaq(end_from, Address(from, qword_count, Address::times_8, -8));
  1860     __ leaq(end_to,   Address(to, qword_count, Address::times_8, -8));
  1896     __ leaq(end_to,   Address(to,   qword_count, Address::times_8, -8));
  1861     __ negq(qword_count);
  1897     __ negq(qword_count);
  1862     __ jmp(L_copy_32_bytes);
  1898     __ jmp(L_copy_32_bytes);
  1863 
  1899 
  1864     // Copy trailing qwords
  1900     // Copy trailing qwords
  1865   __ BIND(L_copy_8_bytes);
  1901   __ BIND(L_copy_8_bytes);
  1921     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1957     __ enter(); // required for proper stackwalking of RuntimeStub frame
  1922     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  1958     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  1923 
  1959 
  1924     address disjoint_copy_entry = NULL;
  1960     address disjoint_copy_entry = NULL;
  1925     if (is_oop) {
  1961     if (is_oop) {
       
  1962       assert(!UseCompressedOops, "shouldn't be called for compressed oops");
  1926       disjoint_copy_entry = disjoint_oop_copy_entry;
  1963       disjoint_copy_entry = disjoint_oop_copy_entry;
  1927       oop_copy_entry  = __ pc();
  1964       oop_copy_entry  = __ pc();
       
  1965       array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
  1928     } else {
  1966     } else {
  1929       disjoint_copy_entry = disjoint_long_copy_entry;
  1967       disjoint_copy_entry = disjoint_long_copy_entry;
  1930       long_copy_entry = __ pc();
  1968       long_copy_entry = __ pc();
       
  1969       array_overlap_test(disjoint_long_copy_entry, Address::times_8);
  1931     }
  1970     }
  1932     BLOCK_COMMENT("Entry:");
  1971     BLOCK_COMMENT("Entry:");
  1933     // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1972     // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1934 
  1973 
  1935     array_overlap_test(disjoint_copy_entry, Address::times_8);
  1974     array_overlap_test(disjoint_copy_entry, Address::times_8);
  1942       // Save to and count for store barrier
  1981       // Save to and count for store barrier
  1943       __ movq(saved_count, qword_count);
  1982       __ movq(saved_count, qword_count);
  1944       // No registers are destroyed by this call
  1983       // No registers are destroyed by this call
  1945       gen_write_ref_array_pre_barrier(to, saved_count);
  1984       gen_write_ref_array_pre_barrier(to, saved_count);
  1946     }
  1985     }
  1947 
       
  1948     // Copy from high to low addresses.  Use rcx as scratch.
       
  1949 
  1986 
  1950     __ jmp(L_copy_32_bytes);
  1987     __ jmp(L_copy_32_bytes);
  1951 
  1988 
  1952     // Copy trailing qwords
  1989     // Copy trailing qwords
  1953   __ BIND(L_copy_8_bytes);
  1990   __ BIND(L_copy_8_bytes);
  2036       // Skip to start of data.
  2073       // Skip to start of data.
  2037       __ addq(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  2074       __ addq(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  2038       // Scan rcx words at [rdi] for occurance of rax
  2075       // Scan rcx words at [rdi] for occurance of rax
  2039       // Set NZ/Z based on last compare
  2076       // Set NZ/Z based on last compare
  2040       __ movq(rax, super_klass);
  2077       __ movq(rax, super_klass);
  2041       __ repne_scan();
  2078       if (UseCompressedOops) {
       
  2079         // Compare against compressed form.  Don't need to uncompress because
       
  2080         // looks like orig rax is restored in popq below.
       
  2081         __ encode_heap_oop(rax);
       
  2082         __ repne_scanl();
       
  2083       } else {
       
  2084          __ repne_scanq();
       
  2085       }
  2042 
  2086 
  2043       // Unspill the temp. registers:
  2087       // Unspill the temp. registers:
  2044       __ popq(rdi);
  2088       __ popq(rdi);
  2045       __ popq(rcx);
  2089       __ popq(rcx);
  2046       __ popq(rax);
  2090       __ popq(rax);
  2113 
  2157 
  2114 #ifdef ASSERT
  2158 #ifdef ASSERT
  2115     // caller guarantees that the arrays really are different
  2159     // caller guarantees that the arrays really are different
  2116     // otherwise, we would have to make conjoint checks
  2160     // otherwise, we would have to make conjoint checks
  2117     { Label L;
  2161     { Label L;
  2118       array_overlap_test(L, Address::times_8);
  2162       array_overlap_test(L, TIMES_OOP);
  2119       __ stop("checkcast_copy within a single array");
  2163       __ stop("checkcast_copy within a single array");
  2120       __ bind(L);
  2164       __ bind(L);
  2121     }
  2165     }
  2122 #endif //ASSERT
  2166 #endif //ASSERT
  2123 
  2167 
  2158       __ bind(L);
  2202       __ bind(L);
  2159     }
  2203     }
  2160 #endif //ASSERT
  2204 #endif //ASSERT
  2161 
  2205 
  2162     // Loop-invariant addresses.  They are exclusive end pointers.
  2206     // Loop-invariant addresses.  They are exclusive end pointers.
  2163     Address end_from_addr(from, length, Address::times_8, 0);
  2207     Address end_from_addr(from, length, TIMES_OOP, 0);
  2164     Address   end_to_addr(to,   length, Address::times_8, 0);
  2208     Address   end_to_addr(to,   length, TIMES_OOP, 0);
  2165     // Loop-variant addresses.  They assume post-incremented count < 0.
  2209     // Loop-variant addresses.  They assume post-incremented count < 0.
  2166     Address from_element_addr(end_from, count, Address::times_8, 0);
  2210     Address from_element_addr(end_from, count, TIMES_OOP, 0);
  2167     Address   to_element_addr(end_to,   count, Address::times_8, 0);
  2211     Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
  2168     Address oop_klass_addr(rax_oop, oopDesc::klass_offset_in_bytes());
       
  2169 
  2212 
  2170     gen_write_ref_array_pre_barrier(to, count);
  2213     gen_write_ref_array_pre_barrier(to, count);
  2171 
  2214 
  2172     // Copy from low to high addresses, indexed from the end of each array.
  2215     // Copy from low to high addresses, indexed from the end of each array.
  2173     __ leaq(end_from, end_from_addr);
  2216     __ leaq(end_from, end_from_addr);
  2187     //   for (count = -count; count != 0; count++)
  2230     //   for (count = -count; count != 0; count++)
  2188     // Base pointers src, dst are biased by 8*(count-1),to last element.
  2231     // Base pointers src, dst are biased by 8*(count-1),to last element.
  2189     __ align(16);
  2232     __ align(16);
  2190 
  2233 
  2191     __ BIND(L_store_element);
  2234     __ BIND(L_store_element);
  2192     __ movq(to_element_addr, rax_oop);  // store the oop
  2235     __ store_heap_oop(to_element_addr, rax_oop);  // store the oop
  2193     __ incrementq(count);               // increment the count toward zero
  2236     __ incrementq(count);               // increment the count toward zero
  2194     __ jcc(Assembler::zero, L_do_card_marks);
  2237     __ jcc(Assembler::zero, L_do_card_marks);
  2195 
  2238 
  2196     // ======== loop entry is here ========
  2239     // ======== loop entry is here ========
  2197     __ BIND(L_load_element);
  2240     __ BIND(L_load_element);
  2198     __ movq(rax_oop, from_element_addr); // load the oop
  2241     __ load_heap_oop(rax_oop, from_element_addr); // load the oop
  2199     __ testq(rax_oop, rax_oop);
  2242     __ testq(rax_oop, rax_oop);
  2200     __ jcc(Assembler::zero, L_store_element);
  2243     __ jcc(Assembler::zero, L_store_element);
  2201 
  2244 
  2202     __ movq(r11_klass, oop_klass_addr); // query the object klass
  2245     __ load_klass(r11_klass, rax_oop);// query the object klass
  2203     generate_type_check(r11_klass, ckoff, ckval, L_store_element);
  2246     generate_type_check(r11_klass, ckoff, ckval, L_store_element);
  2204     // ======== end loop ========
  2247     // ======== end loop ========
  2205 
  2248 
  2206     // It was a real error; we must depend on the caller to finish the job.
  2249     // It was a real error; we must depend on the caller to finish the job.
  2207     // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
  2250     // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
  2423     guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
  2466     guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
  2424 
  2467 
  2425     // registers used as temp
  2468     // registers used as temp
  2426     const Register r11_length    = r11; // elements count to copy
  2469     const Register r11_length    = r11; // elements count to copy
  2427     const Register r10_src_klass = r10; // array klass
  2470     const Register r10_src_klass = r10; // array klass
       
  2471     const Register r9_dst_klass  = r9;  // dest array klass
  2428 
  2472 
  2429     //  if (length < 0) return -1;
  2473     //  if (length < 0) return -1;
  2430     __ movl(r11_length, C_RARG4);       // length (elements count, 32-bits value)
  2474     __ movl(r11_length, C_RARG4);       // length (elements count, 32-bits value)
  2431     __ testl(r11_length, r11_length);
  2475     __ testl(r11_length, r11_length);
  2432     __ jccb(Assembler::negative, L_failed_0);
  2476     __ jccb(Assembler::negative, L_failed_0);
  2433 
  2477 
  2434     Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
  2478     __ load_klass(r10_src_klass, src);
  2435     Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
       
  2436     __ movq(r10_src_klass, src_klass_addr);
       
  2437 #ifdef ASSERT
  2479 #ifdef ASSERT
  2438     //  assert(src->klass() != NULL);
  2480     //  assert(src->klass() != NULL);
  2439     BLOCK_COMMENT("assert klasses not null");
  2481     BLOCK_COMMENT("assert klasses not null");
  2440     { Label L1, L2;
  2482     { Label L1, L2;
  2441       __ testq(r10_src_klass, r10_src_klass);
  2483       __ testq(r10_src_klass, r10_src_klass);
  2442       __ jcc(Assembler::notZero, L2);   // it is broken if klass is NULL
  2484       __ jcc(Assembler::notZero, L2);   // it is broken if klass is NULL
  2443       __ bind(L1);
  2485       __ bind(L1);
  2444       __ stop("broken null klass");
  2486       __ stop("broken null klass");
  2445       __ bind(L2);
  2487       __ bind(L2);
  2446       __ cmpq(dst_klass_addr, 0);
  2488       __ load_klass(r9_dst_klass, dst);
       
  2489       __ cmpq(r9_dst_klass, 0);
  2447       __ jcc(Assembler::equal, L1);     // this would be broken also
  2490       __ jcc(Assembler::equal, L1);     // this would be broken also
  2448       BLOCK_COMMENT("assert done");
  2491       BLOCK_COMMENT("assert done");
  2449     }
  2492     }
  2450 #endif
  2493 #endif
  2451 
  2494 
  2468     jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
  2511     jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
  2469     __ cmpl(rax_lh, objArray_lh);
  2512     __ cmpl(rax_lh, objArray_lh);
  2470     __ jcc(Assembler::equal, L_objArray);
  2513     __ jcc(Assembler::equal, L_objArray);
  2471 
  2514 
  2472     //  if (src->klass() != dst->klass()) return -1;
  2515     //  if (src->klass() != dst->klass()) return -1;
  2473     __ cmpq(r10_src_klass, dst_klass_addr);
  2516     __ load_klass(r9_dst_klass, dst);
       
  2517     __ cmpq(r10_src_klass, r9_dst_klass);
  2474     __ jcc(Assembler::notEqual, L_failed);
  2518     __ jcc(Assembler::notEqual, L_failed);
  2475 
  2519 
  2476     //  if (!src->is_Array()) return -1;
  2520     //  if (!src->is_Array()) return -1;
  2477     __ cmpl(rax_lh, Klass::_lh_neutral_value);
  2521     __ cmpl(rax_lh, Klass::_lh_neutral_value);
  2478     __ jcc(Assembler::greaterEqual, L_failed);
  2522     __ jcc(Assembler::greaterEqual, L_failed);
  2557   __ BIND(L_objArray);
  2601   __ BIND(L_objArray);
  2558     // live at this point:  r10_src_klass, src[_pos], dst[_pos]
  2602     // live at this point:  r10_src_klass, src[_pos], dst[_pos]
  2559 
  2603 
  2560     Label L_plain_copy, L_checkcast_copy;
  2604     Label L_plain_copy, L_checkcast_copy;
  2561     //  test array classes for subtyping
  2605     //  test array classes for subtyping
  2562     __ cmpq(r10_src_klass, dst_klass_addr); // usual case is exact equality
  2606     __ load_klass(r9_dst_klass, dst);
       
  2607     __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
  2563     __ jcc(Assembler::notEqual, L_checkcast_copy);
  2608     __ jcc(Assembler::notEqual, L_checkcast_copy);
  2564 
  2609 
  2565     // Identically typed arrays can be copied without element-wise checks.
  2610     // Identically typed arrays can be copied without element-wise checks.
  2566     arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  2611     arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  2567                            r10, L_failed);
  2612                            r10, L_failed);
  2568 
  2613 
  2569     __ leaq(from, Address(src, src_pos, Address::times_8,
  2614     __ leaq(from, Address(src, src_pos, TIMES_OOP,
  2570                  arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
  2615                  arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
  2571     __ leaq(to,   Address(dst, dst_pos, Address::times_8,
  2616     __ leaq(to,   Address(dst, dst_pos, TIMES_OOP,
  2572                  arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
  2617                   arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
  2573     __ movslq(count, r11_length); // length
  2618     __ movslq(count, r11_length); // length
  2574   __ BIND(L_plain_copy);
  2619   __ BIND(L_plain_copy);
  2575     __ jump(RuntimeAddress(oop_copy_entry));
  2620     __ jump(RuntimeAddress(oop_copy_entry));
  2576 
  2621 
  2577   __ BIND(L_checkcast_copy);
  2622   __ BIND(L_checkcast_copy);
  2578     // live at this point:  r10_src_klass, !r11_length
  2623     // live at this point:  r10_src_klass, !r11_length
  2579     {
  2624     {
  2580       // assert(r11_length == C_RARG4); // will reload from here
  2625       // assert(r11_length == C_RARG4); // will reload from here
  2581       Register r11_dst_klass = r11;
  2626       Register r11_dst_klass = r11;
  2582       __ movq(r11_dst_klass, dst_klass_addr);
  2627       __ load_klass(r11_dst_klass, dst);
  2583 
  2628 
  2584       // Before looking at dst.length, make sure dst is also an objArray.
  2629       // Before looking at dst.length, make sure dst is also an objArray.
  2585       __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
  2630       __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
  2586       __ jcc(Assembler::notEqual, L_failed);
  2631       __ jcc(Assembler::notEqual, L_failed);
  2587 
  2632 
  2591                              rax, L_failed);
  2636                              rax, L_failed);
  2592 #else
  2637 #else
  2593       __ movl(r11_length, C_RARG4);     // reload
  2638       __ movl(r11_length, C_RARG4);     // reload
  2594       arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  2639       arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  2595                              rax, L_failed);
  2640                              rax, L_failed);
  2596       __ movl(r11_dst_klass, dst_klass_addr); // reload
  2641       __ load_klass(r11_dst_klass, dst); // reload
  2597 #endif
  2642 #endif
  2598 
  2643 
  2599       // Marshal the base address arguments now, freeing registers.
  2644       // Marshal the base address arguments now, freeing registers.
  2600       __ leaq(from, Address(src, src_pos, Address::times_8,
  2645       __ leaq(from, Address(src, src_pos, TIMES_OOP,
  2601                    arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  2646                    arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  2602       __ leaq(to,   Address(dst, dst_pos, Address::times_8,
  2647       __ leaq(to,   Address(dst, dst_pos, TIMES_OOP,
  2603                    arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  2648                    arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  2604       __ movl(count, C_RARG4);          // length (reloaded)
  2649       __ movl(count, C_RARG4);          // length (reloaded)
  2605       Register sco_temp = c_rarg3;      // this register is free now
  2650       Register sco_temp = c_rarg3;      // this register is free now
  2606       assert_different_registers(from, to, count, sco_temp,
  2651       assert_different_registers(from, to, count, sco_temp,
  2607                                  r11_dst_klass, r10_src_klass);
  2652                                  r11_dst_klass, r10_src_klass);
  2646     StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
  2691     StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
  2647 
  2692 
  2648     StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
  2693     StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
  2649     StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, "jshort_arraycopy");
  2694     StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, "jshort_arraycopy");
  2650 
  2695 
  2651     StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
  2696     StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
  2652     StubRoutines::_jint_arraycopy            = generate_conjoint_int_copy(false, "jint_arraycopy");
  2697     StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
  2653 
  2698 
  2654     StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy");
  2699     StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy");
  2655     StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy");
  2700     StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy");
  2656 
  2701 
  2657     StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
  2702 
  2658     StubRoutines::_oop_arraycopy             = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
  2703     if (UseCompressedOops) {
       
  2704       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
       
  2705       StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
       
  2706     } else {
       
  2707       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
       
  2708       StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
       
  2709     }
  2659 
  2710 
  2660     StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
  2711     StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
  2661     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
  2712     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
  2662     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
  2713     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
  2663 
  2714