1105 *entry = __ pc(); |
1116 *entry = __ pc(); |
1106 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) |
1117 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) |
1107 BLOCK_COMMENT("Entry:"); |
1118 BLOCK_COMMENT("Entry:"); |
1108 } |
1119 } |
1109 |
1120 |
1110 // for short arrays, just do single element copy |
1121 { |
1111 __ cmp(count, 23); // 16 + 7 |
1122 // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit |
1112 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); |
1123 UnsafeCopyMemoryMark ucmm(this, !aligned, false); |
1113 __ delayed()->mov(G0, offset); |
1124 |
1114 |
1125 // for short arrays, just do single element copy |
1115 if (aligned) { |
1126 __ cmp(count, 23); // 16 + 7 |
1116 // 'aligned' == true when it is known statically during compilation |
1127 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); |
1117 // of this arraycopy call site that both 'from' and 'to' addresses |
1128 __ delayed()->mov(G0, offset); |
1118 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). |
1129 |
1119 // |
1130 if (aligned) { |
1120 // Aligned arrays have 4 bytes alignment in 32-bits VM |
1131 // 'aligned' == true when it is known statically during compilation |
1121 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM |
1132 // of this arraycopy call site that both 'from' and 'to' addresses |
1122 // |
1133 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). |
1123 } else { |
1134 // |
1124 // copy bytes to align 'to' on 8 byte boundary |
1135 // Aligned arrays have 4 bytes alignment in 32-bits VM |
1125 __ andcc(to, 7, G1); // misaligned bytes |
1136 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM |
1126 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); |
1137 // |
1127 __ delayed()->neg(G1); |
1138 } else { |
1128 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment |
1139 // copy bytes to align 'to' on 8 byte boundary |
1129 __ sub(count, G1, count); |
1140 __ andcc(to, 7, G1); // misaligned bytes |
1130 __ BIND(L_align); |
1141 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); |
1131 __ ldub(from, 0, O3); |
1142 __ delayed()->neg(G1); |
1132 __ deccc(G1); |
1143 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment |
1133 __ inc(from); |
1144 __ sub(count, G1, count); |
1134 __ stb(O3, to, 0); |
1145 __ BIND(L_align); |
1135 __ br(Assembler::notZero, false, Assembler::pt, L_align); |
1146 __ ldub(from, 0, O3); |
1136 __ delayed()->inc(to); |
1147 __ deccc(G1); |
1137 __ BIND(L_skip_alignment); |
1148 __ inc(from); |
1138 } |
1149 __ stb(O3, to, 0); |
1139 if (!aligned) { |
1150 __ br(Assembler::notZero, false, Assembler::pt, L_align); |
1140 // Copy with shift 16 bytes per iteration if arrays do not have |
1151 __ delayed()->inc(to); |
1141 // the same alignment mod 8, otherwise fall through to the next |
1152 __ BIND(L_skip_alignment); |
1142 // code for aligned copy. |
1153 } |
1143 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. |
1154 if (!aligned) { |
1144 // Also jump over aligned copy after the copy with shift completed. |
1155 // Copy with shift 16 bytes per iteration if arrays do not have |
1145 |
1156 // the same alignment mod 8, otherwise fall through to the next |
1146 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); |
1157 // code for aligned copy. |
1147 } |
1158 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. |
1148 |
1159 // Also jump over aligned copy after the copy with shift completed. |
1149 // Both array are 8 bytes aligned, copy 16 bytes at a time |
1160 |
|
1161 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); |
|
1162 } |
|
1163 |
|
1164 // Both array are 8 bytes aligned, copy 16 bytes at a time |
1150 __ and3(count, 7, G4); // Save count |
1165 __ and3(count, 7, G4); // Save count |
1151 __ srl(count, 3, count); |
1166 __ srl(count, 3, count); |
1152 generate_disjoint_long_copy_core(aligned); |
1167 generate_disjoint_long_copy_core(aligned); |
1153 __ mov(G4, count); // Restore count |
1168 __ mov(G4, count); // Restore count |
1154 |
1169 |
1155 // copy tailing bytes |
1170 // copy tailing bytes |
1156 __ BIND(L_copy_byte); |
1171 __ BIND(L_copy_byte); |
1157 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1172 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1158 __ align(OptoLoopAlignment); |
1173 __ align(OptoLoopAlignment); |
1159 __ BIND(L_copy_byte_loop); |
1174 __ BIND(L_copy_byte_loop); |
1160 __ ldub(from, offset, O3); |
1175 __ ldub(from, offset, O3); |
1161 __ deccc(count); |
1176 __ deccc(count); |
1162 __ stb(O3, to, offset); |
1177 __ stb(O3, to, offset); |
1163 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); |
1178 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); |
1164 __ delayed()->inc(offset); |
1179 __ delayed()->inc(offset); |
|
1180 } |
1165 |
1181 |
1166 __ BIND(L_exit); |
1182 __ BIND(L_exit); |
1167 // O3, O4 are used as temp registers |
1183 // O3, O4 are used as temp registers |
1168 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); |
1184 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); |
1169 __ retl(); |
1185 __ retl(); |
1205 BLOCK_COMMENT("Entry:"); |
1221 BLOCK_COMMENT("Entry:"); |
1206 } |
1222 } |
1207 |
1223 |
1208 array_overlap_test(nooverlap_target, 0); |
1224 array_overlap_test(nooverlap_target, 0); |
1209 |
1225 |
1210 __ add(to, count, end_to); // offset after last copied element |
|
1211 |
|
1212 // for short arrays, just do single element copy |
|
1213 __ cmp(count, 23); // 16 + 7 |
|
1214 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); |
|
1215 __ delayed()->add(from, count, end_from); |
|
1216 |
|
1217 { |
1226 { |
1218 // Align end of arrays since they could be not aligned even |
1227 // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit |
1219 // when arrays itself are aligned. |
1228 UnsafeCopyMemoryMark ucmm(this, !aligned, false); |
1220 |
1229 |
1221 // copy bytes to align 'end_to' on 8 byte boundary |
1230 __ add(to, count, end_to); // offset after last copied element |
1222 __ andcc(end_to, 7, G1); // misaligned bytes |
1231 |
1223 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); |
1232 // for short arrays, just do single element copy |
1224 __ delayed()->nop(); |
1233 __ cmp(count, 23); // 16 + 7 |
1225 __ sub(count, G1, count); |
1234 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); |
1226 __ BIND(L_align); |
1235 __ delayed()->add(from, count, end_from); |
1227 __ dec(end_from); |
1236 |
1228 __ dec(end_to); |
1237 { |
1229 __ ldub(end_from, 0, O3); |
1238 // Align end of arrays since they could be not aligned even |
1230 __ deccc(G1); |
1239 // when arrays itself are aligned. |
1231 __ brx(Assembler::notZero, false, Assembler::pt, L_align); |
1240 |
1232 __ delayed()->stb(O3, end_to, 0); |
1241 // copy bytes to align 'end_to' on 8 byte boundary |
1233 __ BIND(L_skip_alignment); |
1242 __ andcc(end_to, 7, G1); // misaligned bytes |
1234 } |
1243 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); |
1235 if (aligned) { |
1244 __ delayed()->nop(); |
1236 // Both arrays are aligned to 8-bytes in 64-bits VM. |
1245 __ sub(count, G1, count); |
1237 // The 'count' is decremented in copy_16_bytes_backward_with_shift() |
1246 __ BIND(L_align); |
1238 // in unaligned case. |
1247 __ dec(end_from); |
1239 __ dec(count, 16); |
1248 __ dec(end_to); |
1240 } else { |
1249 __ ldub(end_from, 0, O3); |
1241 // Copy with shift 16 bytes per iteration if arrays do not have |
1250 __ deccc(G1); |
1242 // the same alignment mod 8, otherwise jump to the next |
1251 __ brx(Assembler::notZero, false, Assembler::pt, L_align); |
1243 // code for aligned copy (and substracting 16 from 'count' before jump). |
1252 __ delayed()->stb(O3, end_to, 0); |
1244 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. |
1253 __ BIND(L_skip_alignment); |
1245 // Also jump over aligned copy after the copy with shift completed. |
1254 } |
1246 |
1255 if (aligned) { |
1247 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, |
1256 // Both arrays are aligned to 8-bytes in 64-bits VM. |
1248 L_aligned_copy, L_copy_byte); |
1257 // The 'count' is decremented in copy_16_bytes_backward_with_shift() |
1249 } |
1258 // in unaligned case. |
1250 // copy 4 elements (16 bytes) at a time |
1259 __ dec(count, 16); |
1251 __ align(OptoLoopAlignment); |
1260 } else { |
1252 __ BIND(L_aligned_copy); |
1261 // Copy with shift 16 bytes per iteration if arrays do not have |
1253 __ dec(end_from, 16); |
1262 // the same alignment mod 8, otherwise jump to the next |
1254 __ ldx(end_from, 8, O3); |
1263 // code for aligned copy (and substracting 16 from 'count' before jump). |
1255 __ ldx(end_from, 0, O4); |
1264 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. |
1256 __ dec(end_to, 16); |
1265 // Also jump over aligned copy after the copy with shift completed. |
1257 __ deccc(count, 16); |
1266 |
1258 __ stx(O3, end_to, 8); |
1267 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, |
1259 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); |
1268 L_aligned_copy, L_copy_byte); |
1260 __ delayed()->stx(O4, end_to, 0); |
1269 } |
1261 __ inc(count, 16); |
1270 // copy 4 elements (16 bytes) at a time |
1262 |
1271 __ align(OptoLoopAlignment); |
1263 // copy 1 element (2 bytes) at a time |
1272 __ BIND(L_aligned_copy); |
1264 __ BIND(L_copy_byte); |
1273 __ dec(end_from, 16); |
1265 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1274 __ ldx(end_from, 8, O3); |
1266 __ align(OptoLoopAlignment); |
1275 __ ldx(end_from, 0, O4); |
1267 __ BIND(L_copy_byte_loop); |
1276 __ dec(end_to, 16); |
1268 __ dec(end_from); |
1277 __ deccc(count, 16); |
1269 __ dec(end_to); |
1278 __ stx(O3, end_to, 8); |
1270 __ ldub(end_from, 0, O4); |
1279 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); |
1271 __ deccc(count); |
1280 __ delayed()->stx(O4, end_to, 0); |
1272 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); |
1281 __ inc(count, 16); |
1273 __ delayed()->stb(O4, end_to, 0); |
1282 |
|
1283 // copy 1 element (2 bytes) at a time |
|
1284 __ BIND(L_copy_byte); |
|
1285 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
|
1286 __ align(OptoLoopAlignment); |
|
1287 __ BIND(L_copy_byte_loop); |
|
1288 __ dec(end_from); |
|
1289 __ dec(end_to); |
|
1290 __ ldub(end_from, 0, O4); |
|
1291 __ deccc(count); |
|
1292 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); |
|
1293 __ delayed()->stb(O4, end_to, 0); |
|
1294 } |
1274 |
1295 |
1275 __ BIND(L_exit); |
1296 __ BIND(L_exit); |
1276 // O3, O4 are used as temp registers |
1297 // O3, O4 are used as temp registers |
1277 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); |
1298 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); |
1278 __ retl(); |
1299 __ retl(); |
1309 *entry = __ pc(); |
1330 *entry = __ pc(); |
1310 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) |
1331 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) |
1311 BLOCK_COMMENT("Entry:"); |
1332 BLOCK_COMMENT("Entry:"); |
1312 } |
1333 } |
1313 |
1334 |
1314 // for short arrays, just do single element copy |
1335 { |
1315 __ cmp(count, 11); // 8 + 3 (22 bytes) |
1336 // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit |
1316 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); |
1337 UnsafeCopyMemoryMark ucmm(this, !aligned, false); |
1317 __ delayed()->mov(G0, offset); |
1338 // for short arrays, just do single element copy |
1318 |
1339 __ cmp(count, 11); // 8 + 3 (22 bytes) |
1319 if (aligned) { |
1340 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); |
1320 // 'aligned' == true when it is known statically during compilation |
1341 __ delayed()->mov(G0, offset); |
1321 // of this arraycopy call site that both 'from' and 'to' addresses |
1342 |
1322 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). |
1343 if (aligned) { |
1323 // |
1344 // 'aligned' == true when it is known statically during compilation |
1324 // Aligned arrays have 4 bytes alignment in 32-bits VM |
1345 // of this arraycopy call site that both 'from' and 'to' addresses |
1325 // and 8 bytes - in 64-bits VM. |
1346 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). |
1326 // |
1347 // |
1327 } else { |
1348 // Aligned arrays have 4 bytes alignment in 32-bits VM |
1328 // copy 1 element if necessary to align 'to' on an 4 bytes |
1349 // and 8 bytes - in 64-bits VM. |
1329 __ andcc(to, 3, G0); |
1350 // |
1330 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); |
1351 } else { |
1331 __ delayed()->lduh(from, 0, O3); |
1352 // copy 1 element if necessary to align 'to' on an 4 bytes |
1332 __ inc(from, 2); |
1353 __ andcc(to, 3, G0); |
1333 __ inc(to, 2); |
1354 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); |
1334 __ dec(count); |
1355 __ delayed()->lduh(from, 0, O3); |
1335 __ sth(O3, to, -2); |
1356 __ inc(from, 2); |
1336 __ BIND(L_skip_alignment); |
1357 __ inc(to, 2); |
1337 |
1358 __ dec(count); |
1338 // copy 2 elements to align 'to' on an 8 byte boundary |
1359 __ sth(O3, to, -2); |
1339 __ andcc(to, 7, G0); |
1360 __ BIND(L_skip_alignment); |
1340 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); |
1361 |
1341 __ delayed()->lduh(from, 0, O3); |
1362 // copy 2 elements to align 'to' on an 8 byte boundary |
1342 __ dec(count, 2); |
1363 __ andcc(to, 7, G0); |
1343 __ lduh(from, 2, O4); |
1364 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); |
1344 __ inc(from, 4); |
1365 __ delayed()->lduh(from, 0, O3); |
1345 __ inc(to, 4); |
1366 __ dec(count, 2); |
1346 __ sth(O3, to, -4); |
1367 __ lduh(from, 2, O4); |
1347 __ sth(O4, to, -2); |
1368 __ inc(from, 4); |
1348 __ BIND(L_skip_alignment2); |
1369 __ inc(to, 4); |
1349 } |
1370 __ sth(O3, to, -4); |
1350 if (!aligned) { |
1371 __ sth(O4, to, -2); |
1351 // Copy with shift 16 bytes per iteration if arrays do not have |
1372 __ BIND(L_skip_alignment2); |
1352 // the same alignment mod 8, otherwise fall through to the next |
1373 } |
1353 // code for aligned copy. |
1374 if (!aligned) { |
1354 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. |
1375 // Copy with shift 16 bytes per iteration if arrays do not have |
1355 // Also jump over aligned copy after the copy with shift completed. |
1376 // the same alignment mod 8, otherwise fall through to the next |
1356 |
1377 // code for aligned copy. |
1357 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); |
1378 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. |
1358 } |
1379 // Also jump over aligned copy after the copy with shift completed. |
1359 |
1380 |
1360 // Both array are 8 bytes aligned, copy 16 bytes at a time |
1381 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); |
1361 __ and3(count, 3, G4); // Save |
1382 } |
1362 __ srl(count, 2, count); |
1383 |
1363 generate_disjoint_long_copy_core(aligned); |
1384 // Both array are 8 bytes aligned, copy 16 bytes at a time |
1364 __ mov(G4, count); // restore |
1385 __ and3(count, 3, G4); // Save |
1365 |
1386 __ srl(count, 2, count); |
1366 // copy 1 element at a time |
1387 generate_disjoint_long_copy_core(aligned); |
1367 __ BIND(L_copy_2_bytes); |
1388 __ mov(G4, count); // restore |
1368 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1389 |
1369 __ align(OptoLoopAlignment); |
1390 // copy 1 element at a time |
1370 __ BIND(L_copy_2_bytes_loop); |
1391 __ BIND(L_copy_2_bytes); |
1371 __ lduh(from, offset, O3); |
1392 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1372 __ deccc(count); |
1393 __ align(OptoLoopAlignment); |
1373 __ sth(O3, to, offset); |
1394 __ BIND(L_copy_2_bytes_loop); |
1374 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); |
1395 __ lduh(from, offset, O3); |
1375 __ delayed()->inc(offset, 2); |
1396 __ deccc(count); |
|
1397 __ sth(O3, to, offset); |
|
1398 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); |
|
1399 __ delayed()->inc(offset, 2); |
|
1400 } |
1376 |
1401 |
1377 __ BIND(L_exit); |
1402 __ BIND(L_exit); |
1378 // O3, O4 are used as temp registers |
1403 // O3, O4 are used as temp registers |
1379 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); |
1404 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); |
1380 __ retl(); |
1405 __ retl(); |
1637 BLOCK_COMMENT("Entry:"); |
1662 BLOCK_COMMENT("Entry:"); |
1638 } |
1663 } |
1639 |
1664 |
1640 array_overlap_test(nooverlap_target, 1); |
1665 array_overlap_test(nooverlap_target, 1); |
1641 |
1666 |
1642 __ sllx(count, LogBytesPerShort, byte_count); |
|
1643 __ add(to, byte_count, end_to); // offset after last copied element |
|
1644 |
|
1645 // for short arrays, just do single element copy |
|
1646 __ cmp(count, 11); // 8 + 3 (22 bytes) |
|
1647 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); |
|
1648 __ delayed()->add(from, byte_count, end_from); |
|
1649 |
|
1650 { |
1667 { |
1651 // Align end of arrays since they could be not aligned even |
1668 // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit |
1652 // when arrays itself are aligned. |
1669 UnsafeCopyMemoryMark ucmm(this, !aligned, false); |
1653 |
1670 |
1654 // copy 1 element if necessary to align 'end_to' on an 4 bytes |
1671 __ sllx(count, LogBytesPerShort, byte_count); |
1655 __ andcc(end_to, 3, G0); |
1672 __ add(to, byte_count, end_to); // offset after last copied element |
1656 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); |
1673 |
1657 __ delayed()->lduh(end_from, -2, O3); |
1674 // for short arrays, just do single element copy |
1658 __ dec(end_from, 2); |
1675 __ cmp(count, 11); // 8 + 3 (22 bytes) |
1659 __ dec(end_to, 2); |
1676 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); |
1660 __ dec(count); |
1677 __ delayed()->add(from, byte_count, end_from); |
1661 __ sth(O3, end_to, 0); |
1678 |
1662 __ BIND(L_skip_alignment); |
1679 { |
1663 |
1680 // Align end of arrays since they could be not aligned even |
1664 // copy 2 elements to align 'end_to' on an 8 byte boundary |
1681 // when arrays itself are aligned. |
1665 __ andcc(end_to, 7, G0); |
1682 |
1666 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); |
1683 // copy 1 element if necessary to align 'end_to' on an 4 bytes |
1667 __ delayed()->lduh(end_from, -2, O3); |
1684 __ andcc(end_to, 3, G0); |
1668 __ dec(count, 2); |
1685 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); |
1669 __ lduh(end_from, -4, O4); |
1686 __ delayed()->lduh(end_from, -2, O3); |
1670 __ dec(end_from, 4); |
1687 __ dec(end_from, 2); |
1671 __ dec(end_to, 4); |
1688 __ dec(end_to, 2); |
1672 __ sth(O3, end_to, 2); |
1689 __ dec(count); |
1673 __ sth(O4, end_to, 0); |
1690 __ sth(O3, end_to, 0); |
1674 __ BIND(L_skip_alignment2); |
1691 __ BIND(L_skip_alignment); |
1675 } |
1692 |
1676 if (aligned) { |
1693 // copy 2 elements to align 'end_to' on an 8 byte boundary |
1677 // Both arrays are aligned to 8-bytes in 64-bits VM. |
1694 __ andcc(end_to, 7, G0); |
1678 // The 'count' is decremented in copy_16_bytes_backward_with_shift() |
1695 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); |
1679 // in unaligned case. |
1696 __ delayed()->lduh(end_from, -2, O3); |
1680 __ dec(count, 8); |
1697 __ dec(count, 2); |
1681 } else { |
1698 __ lduh(end_from, -4, O4); |
1682 // Copy with shift 16 bytes per iteration if arrays do not have |
1699 __ dec(end_from, 4); |
1683 // the same alignment mod 8, otherwise jump to the next |
1700 __ dec(end_to, 4); |
1684 // code for aligned copy (and substracting 8 from 'count' before jump). |
1701 __ sth(O3, end_to, 2); |
1685 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. |
1702 __ sth(O4, end_to, 0); |
1686 // Also jump over aligned copy after the copy with shift completed. |
1703 __ BIND(L_skip_alignment2); |
1687 |
1704 } |
1688 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, |
1705 if (aligned) { |
|
1706 // Both arrays are aligned to 8-bytes in 64-bits VM. |
|
1707 // The 'count' is decremented in copy_16_bytes_backward_with_shift() |
|
1708 // in unaligned case. |
|
1709 __ dec(count, 8); |
|
1710 } else { |
|
1711 // Copy with shift 16 bytes per iteration if arrays do not have |
|
1712 // the same alignment mod 8, otherwise jump to the next |
|
1713 // code for aligned copy (and substracting 8 from 'count' before jump). |
|
1714 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. |
|
1715 // Also jump over aligned copy after the copy with shift completed. |
|
1716 |
|
1717 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, |
1689 L_aligned_copy, L_copy_2_bytes); |
1718 L_aligned_copy, L_copy_2_bytes); |
1690 } |
1719 } |
1691 // copy 4 elements (16 bytes) at a time |
1720 // copy 4 elements (16 bytes) at a time |
1692 __ align(OptoLoopAlignment); |
1721 __ align(OptoLoopAlignment); |
1693 __ BIND(L_aligned_copy); |
1722 __ BIND(L_aligned_copy); |
1694 __ dec(end_from, 16); |
1723 __ dec(end_from, 16); |
1695 __ ldx(end_from, 8, O3); |
1724 __ ldx(end_from, 8, O3); |
1696 __ ldx(end_from, 0, O4); |
1725 __ ldx(end_from, 0, O4); |
1697 __ dec(end_to, 16); |
1726 __ dec(end_to, 16); |
1698 __ deccc(count, 8); |
1727 __ deccc(count, 8); |
1699 __ stx(O3, end_to, 8); |
1728 __ stx(O3, end_to, 8); |
1700 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); |
1729 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); |
1701 __ delayed()->stx(O4, end_to, 0); |
1730 __ delayed()->stx(O4, end_to, 0); |
1702 __ inc(count, 8); |
1731 __ inc(count, 8); |
1703 |
1732 |
1704 // copy 1 element (2 bytes) at a time |
1733 // copy 1 element (2 bytes) at a time |
1705 __ BIND(L_copy_2_bytes); |
1734 __ BIND(L_copy_2_bytes); |
1706 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1735 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1707 __ BIND(L_copy_2_bytes_loop); |
1736 __ BIND(L_copy_2_bytes_loop); |
1708 __ dec(end_from, 2); |
1737 __ dec(end_from, 2); |
1709 __ dec(end_to, 2); |
1738 __ dec(end_to, 2); |
1710 __ lduh(end_from, 0, O4); |
1739 __ lduh(end_from, 0, O4); |
1711 __ deccc(count); |
1740 __ deccc(count); |
1712 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); |
1741 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); |
1713 __ delayed()->sth(O4, end_to, 0); |
1742 __ delayed()->sth(O4, end_to, 0); |
1714 |
1743 } |
1715 __ BIND(L_exit); |
1744 __ BIND(L_exit); |
1716 // O3, O4 are used as temp registers |
1745 // O3, O4 are used as temp registers |
1717 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); |
1746 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); |
1718 __ retl(); |
1747 __ retl(); |
1719 __ delayed()->mov(G0, O0); // return 0 |
1748 __ delayed()->mov(G0, O0); // return 0 |