1998 // Arguments: |
1998 // Arguments: |
1999 // from: O0 |
1999 // from: O0 |
2000 // to: O1 |
2000 // to: O1 |
2001 // count: O2 treated as signed |
2001 // count: O2 treated as signed |
2002 // |
2002 // |
|
2003 // count -= 2; |
|
2004 // if ( count >= 0 ) { // >= 2 elements |
|
2005 // if ( count > 6) { // >= 8 elements |
|
2006 // count -= 6; // original count - 8 |
|
2007 // do { |
|
2008 // copy_8_elements; |
|
2009 // count -= 8; |
|
2010 // } while ( count >= 0 ); |
|
2011 // count += 6; |
|
2012 // } |
|
2013 // if ( count >= 0 ) { // >= 2 elements |
|
2014 // do { |
|
2015 // copy_2_elements; |
|
2016 // } while ( (count=count-2) >= 0 ); |
|
2017 // } |
|
2018 // } |
|
2019 // count += 2; |
|
2020 // if ( count != 0 ) { // 1 element left |
|
2021 // copy_1_element; |
|
2022 // } |
|
2023 // |
2003 void generate_disjoint_long_copy_core(bool aligned) { |
2024 void generate_disjoint_long_copy_core(bool aligned) { |
2004 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; |
2025 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; |
2005 const Register from = O0; // source array address |
2026 const Register from = O0; // source array address |
2006 const Register to = O1; // destination array address |
2027 const Register to = O1; // destination array address |
2007 const Register count = O2; // elements count |
2028 const Register count = O2; // elements count |
2010 |
2031 |
2011 __ deccc(count, 2); |
2032 __ deccc(count, 2); |
2012 __ mov(G0, offset0); // offset from start of arrays (0) |
2033 __ mov(G0, offset0); // offset from start of arrays (0) |
2013 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); |
2034 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); |
2014 __ delayed()->add(offset0, 8, offset8); |
2035 __ delayed()->add(offset0, 8, offset8); |
|
2036 |
|
2037 // Copy by 64 bytes chunks |
|
2038 Label L_copy_64_bytes; |
|
2039 const Register from64 = O3; // source address |
|
2040 const Register to64 = G3; // destination address |
|
2041 __ subcc(count, 6, O3); |
|
2042 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); |
|
2043 __ delayed()->mov(to, to64); |
|
2044 // Now we can use O4(offset0), O5(offset8) as temps |
|
2045 __ mov(O3, count); |
|
2046 __ mov(from, from64); |
|
2047 |
|
2048 __ align(16); |
|
2049 __ BIND(L_copy_64_bytes); |
|
2050 for( int off = 0; off < 64; off += 16 ) { |
|
2051 __ ldx(from64, off+0, O4); |
|
2052 __ ldx(from64, off+8, O5); |
|
2053 __ stx(O4, to64, off+0); |
|
2054 __ stx(O5, to64, off+8); |
|
2055 } |
|
2056 __ deccc(count, 8); |
|
2057 __ inc(from64, 64); |
|
2058 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes); |
|
2059 __ delayed()->inc(to64, 64); |
|
2060 |
|
2061 // Restore O4(offset0), O5(offset8) |
|
2062 __ sub(from64, from, offset0); |
|
2063 __ inccc(count, 6); |
|
2064 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); |
|
2065 __ delayed()->add(offset0, 8, offset8); |
|
2066 |
|
2067 // Copy by 16 bytes chunks |
2015 __ align(16); |
2068 __ align(16); |
2016 __ BIND(L_copy_16_bytes); |
2069 __ BIND(L_copy_16_bytes); |
2017 __ ldx(from, offset0, O3); |
2070 __ ldx(from, offset0, O3); |
2018 __ ldx(from, offset8, G3); |
2071 __ ldx(from, offset8, G3); |
2019 __ deccc(count, 2); |
2072 __ deccc(count, 2); |
2021 __ inc(offset0, 16); |
2074 __ inc(offset0, 16); |
2022 __ stx(G3, to, offset8); |
2075 __ stx(G3, to, offset8); |
2023 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); |
2076 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); |
2024 __ delayed()->inc(offset8, 16); |
2077 __ delayed()->inc(offset8, 16); |
2025 |
2078 |
|
2079 // Copy last 8 bytes |
2026 __ BIND(L_copy_8_bytes); |
2080 __ BIND(L_copy_8_bytes); |
2027 __ inccc(count, 2); |
2081 __ inccc(count, 2); |
2028 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); |
2082 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); |
2029 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs |
2083 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs |
2030 __ ldx(from, offset0, O3); |
2084 __ ldx(from, offset0, O3); |