src/hotspot/cpu/sparc/stubGenerator_sparc.cpp
changeset 55490 3f3dc00a69a5
parent 53591 bf4c38b9afaf
child 59249 29b0d0b61615
equal deleted inserted replaced
55489:c749ecf599c0 55490:3f3dc00a69a5
  1074       __ srl(left_shift, LogBitsPerByte, left_shift);    // misaligned bytes
  1074       __ srl(left_shift, LogBitsPerByte, left_shift);    // misaligned bytes
  1075       __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
  1075       __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
  1076       __ delayed()->add(end_from, left_shift, end_from); // restore address
  1076       __ delayed()->add(end_from, left_shift, end_from); // restore address
  1077   }
  1077   }
  1078 
  1078 
       
  1079   address generate_unsafecopy_common_error_exit() {
       
  1080     address start_pc = __ pc();
       
  1081     if (UseBlockCopy) {
       
  1082       __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT);
       
  1083       __ membar(Assembler::StoreLoad);
       
  1084     }
       
  1085     __ retl();
       
  1086     __ delayed()->mov(G0, O0); // return 0
       
  1087     return start_pc;
       
  1088   }
       
  1089 
  1079   //
  1090   //
  1080   //  Generate stub for disjoint byte copy.  If "aligned" is true, the
  1091   //  Generate stub for disjoint byte copy.  If "aligned" is true, the
  1081   //  "from" and "to" addresses are assumed to be heapword aligned.
  1092   //  "from" and "to" addresses are assumed to be heapword aligned.
  1082   //
  1093   //
  1083   // Arguments for generated stub:
  1094   // Arguments for generated stub:
  1105       *entry = __ pc();
  1116       *entry = __ pc();
  1106       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1117       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1107       BLOCK_COMMENT("Entry:");
  1118       BLOCK_COMMENT("Entry:");
  1108     }
  1119     }
  1109 
  1120 
  1110     // for short arrays, just do single element copy
  1121     {
  1111     __ cmp(count, 23); // 16 + 7
  1122       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
  1112     __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
  1123       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
  1113     __ delayed()->mov(G0, offset);
  1124 
  1114 
  1125       // for short arrays, just do single element copy
  1115     if (aligned) {
  1126       __ cmp(count, 23); // 16 + 7
  1116       // 'aligned' == true when it is known statically during compilation
  1127       __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
  1117       // of this arraycopy call site that both 'from' and 'to' addresses
  1128       __ delayed()->mov(G0, offset);
  1118       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
  1129 
  1119       //
  1130       if (aligned) {
  1120       // Aligned arrays have 4 bytes alignment in 32-bits VM
  1131         // 'aligned' == true when it is known statically during compilation
  1121       // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
  1132         // of this arraycopy call site that both 'from' and 'to' addresses
  1122       //
  1133         // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
  1123     } else {
  1134         //
  1124       // copy bytes to align 'to' on 8 byte boundary
  1135         // Aligned arrays have 4 bytes alignment in 32-bits VM
  1125       __ andcc(to, 7, G1); // misaligned bytes
  1136         // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
  1126       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
  1137         //
  1127       __ delayed()->neg(G1);
  1138       } else {
  1128       __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
  1139         // copy bytes to align 'to' on 8 byte boundary
  1129       __ sub(count, G1, count);
  1140         __ andcc(to, 7, G1); // misaligned bytes
  1130     __ BIND(L_align);
  1141         __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
  1131       __ ldub(from, 0, O3);
  1142         __ delayed()->neg(G1);
  1132       __ deccc(G1);
  1143         __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
  1133       __ inc(from);
  1144         __ sub(count, G1, count);
  1134       __ stb(O3, to, 0);
  1145       __ BIND(L_align);
  1135       __ br(Assembler::notZero, false, Assembler::pt, L_align);
  1146         __ ldub(from, 0, O3);
  1136       __ delayed()->inc(to);
  1147         __ deccc(G1);
  1137     __ BIND(L_skip_alignment);
  1148         __ inc(from);
  1138     }
  1149         __ stb(O3, to, 0);
  1139     if (!aligned) {
  1150         __ br(Assembler::notZero, false, Assembler::pt, L_align);
  1140       // Copy with shift 16 bytes per iteration if arrays do not have
  1151         __ delayed()->inc(to);
  1141       // the same alignment mod 8, otherwise fall through to the next
  1152       __ BIND(L_skip_alignment);
  1142       // code for aligned copy.
  1153       }
  1143       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
  1154       if (!aligned) {
  1144       // Also jump over aligned copy after the copy with shift completed.
  1155         // Copy with shift 16 bytes per iteration if arrays do not have
  1145 
  1156         // the same alignment mod 8, otherwise fall through to the next
  1146       copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
  1157         // code for aligned copy.
  1147     }
  1158         // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
  1148 
  1159         // Also jump over aligned copy after the copy with shift completed.
  1149     // Both array are 8 bytes aligned, copy 16 bytes at a time
  1160 
       
  1161         copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
       
  1162       }
       
  1163 
       
  1164       // Both array are 8 bytes aligned, copy 16 bytes at a time
  1150       __ and3(count, 7, G4); // Save count
  1165       __ and3(count, 7, G4); // Save count
  1151       __ srl(count, 3, count);
  1166       __ srl(count, 3, count);
  1152      generate_disjoint_long_copy_core(aligned);
  1167       generate_disjoint_long_copy_core(aligned);
  1153       __ mov(G4, count);     // Restore count
  1168       __ mov(G4, count);     // Restore count
  1154 
  1169 
  1155     // copy tailing bytes
  1170       // copy tailing bytes
  1156     __ BIND(L_copy_byte);
  1171       __ BIND(L_copy_byte);
  1157       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
  1172         __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
  1158       __ align(OptoLoopAlignment);
  1173         __ align(OptoLoopAlignment);
  1159     __ BIND(L_copy_byte_loop);
  1174       __ BIND(L_copy_byte_loop);
  1160       __ ldub(from, offset, O3);
  1175         __ ldub(from, offset, O3);
  1161       __ deccc(count);
  1176         __ deccc(count);
  1162       __ stb(O3, to, offset);
  1177         __ stb(O3, to, offset);
  1163       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
  1178         __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
  1164       __ delayed()->inc(offset);
  1179         __ delayed()->inc(offset);
       
  1180     }
  1165 
  1181 
  1166     __ BIND(L_exit);
  1182     __ BIND(L_exit);
  1167       // O3, O4 are used as temp registers
  1183       // O3, O4 are used as temp registers
  1168       inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
  1184       inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
  1169       __ retl();
  1185       __ retl();
  1205       BLOCK_COMMENT("Entry:");
  1221       BLOCK_COMMENT("Entry:");
  1206     }
  1222     }
  1207 
  1223 
  1208     array_overlap_test(nooverlap_target, 0);
  1224     array_overlap_test(nooverlap_target, 0);
  1209 
  1225 
  1210     __ add(to, count, end_to);       // offset after last copied element
       
  1211 
       
  1212     // for short arrays, just do single element copy
       
  1213     __ cmp(count, 23); // 16 + 7
       
  1214     __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
       
  1215     __ delayed()->add(from, count, end_from);
       
  1216 
       
  1217     {
  1226     {
  1218       // Align end of arrays since they could be not aligned even
  1227       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
  1219       // when arrays itself are aligned.
  1228       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
  1220 
  1229 
  1221       // copy bytes to align 'end_to' on 8 byte boundary
  1230       __ add(to, count, end_to);       // offset after last copied element
  1222       __ andcc(end_to, 7, G1); // misaligned bytes
  1231 
  1223       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
  1232       // for short arrays, just do single element copy
  1224       __ delayed()->nop();
  1233       __ cmp(count, 23); // 16 + 7
  1225       __ sub(count, G1, count);
  1234       __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
  1226     __ BIND(L_align);
  1235       __ delayed()->add(from, count, end_from);
  1227       __ dec(end_from);
  1236 
  1228       __ dec(end_to);
  1237       {
  1229       __ ldub(end_from, 0, O3);
  1238         // Align end of arrays since they could be not aligned even
  1230       __ deccc(G1);
  1239         // when arrays itself are aligned.
  1231       __ brx(Assembler::notZero, false, Assembler::pt, L_align);
  1240 
  1232       __ delayed()->stb(O3, end_to, 0);
  1241         // copy bytes to align 'end_to' on 8 byte boundary
  1233     __ BIND(L_skip_alignment);
  1242         __ andcc(end_to, 7, G1); // misaligned bytes
  1234     }
  1243         __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
  1235     if (aligned) {
  1244         __ delayed()->nop();
  1236       // Both arrays are aligned to 8-bytes in 64-bits VM.
  1245         __ sub(count, G1, count);
  1237       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
  1246       __ BIND(L_align);
  1238       // in unaligned case.
  1247         __ dec(end_from);
  1239       __ dec(count, 16);
  1248         __ dec(end_to);
  1240     } else {
  1249         __ ldub(end_from, 0, O3);
  1241       // Copy with shift 16 bytes per iteration if arrays do not have
  1250         __ deccc(G1);
  1242       // the same alignment mod 8, otherwise jump to the next
  1251         __ brx(Assembler::notZero, false, Assembler::pt, L_align);
  1243       // code for aligned copy (and substracting 16 from 'count' before jump).
  1252         __ delayed()->stb(O3, end_to, 0);
  1244       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
  1253       __ BIND(L_skip_alignment);
  1245       // Also jump over aligned copy after the copy with shift completed.
  1254       }
  1246 
  1255       if (aligned) {
  1247       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
  1256         // Both arrays are aligned to 8-bytes in 64-bits VM.
  1248                                         L_aligned_copy, L_copy_byte);
  1257         // The 'count' is decremented in copy_16_bytes_backward_with_shift()
  1249     }
  1258         // in unaligned case.
  1250     // copy 4 elements (16 bytes) at a time
  1259         __ dec(count, 16);
  1251       __ align(OptoLoopAlignment);
  1260       } else {
  1252     __ BIND(L_aligned_copy);
  1261         // Copy with shift 16 bytes per iteration if arrays do not have
  1253       __ dec(end_from, 16);
  1262         // the same alignment mod 8, otherwise jump to the next
  1254       __ ldx(end_from, 8, O3);
  1263         // code for aligned copy (and substracting 16 from 'count' before jump).
  1255       __ ldx(end_from, 0, O4);
  1264         // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
  1256       __ dec(end_to, 16);
  1265         // Also jump over aligned copy after the copy with shift completed.
  1257       __ deccc(count, 16);
  1266 
  1258       __ stx(O3, end_to, 8);
  1267        copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
  1259       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
  1268                                           L_aligned_copy, L_copy_byte);
  1260       __ delayed()->stx(O4, end_to, 0);
  1269       }
  1261       __ inc(count, 16);
  1270       // copy 4 elements (16 bytes) at a time
  1262 
  1271         __ align(OptoLoopAlignment);
  1263     // copy 1 element (2 bytes) at a time
  1272       __ BIND(L_aligned_copy);
  1264     __ BIND(L_copy_byte);
  1273         __ dec(end_from, 16);
  1265       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
  1274         __ ldx(end_from, 8, O3);
  1266       __ align(OptoLoopAlignment);
  1275         __ ldx(end_from, 0, O4);
  1267     __ BIND(L_copy_byte_loop);
  1276         __ dec(end_to, 16);
  1268       __ dec(end_from);
  1277         __ deccc(count, 16);
  1269       __ dec(end_to);
  1278         __ stx(O3, end_to, 8);
  1270       __ ldub(end_from, 0, O4);
  1279         __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
  1271       __ deccc(count);
  1280         __ delayed()->stx(O4, end_to, 0);
  1272       __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
  1281         __ inc(count, 16);
  1273       __ delayed()->stb(O4, end_to, 0);
  1282 
       
  1283       // copy 1 element (2 bytes) at a time
       
  1284       __ BIND(L_copy_byte);
       
  1285         __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
       
  1286         __ align(OptoLoopAlignment);
       
  1287       __ BIND(L_copy_byte_loop);
       
  1288         __ dec(end_from);
       
  1289         __ dec(end_to);
       
  1290         __ ldub(end_from, 0, O4);
       
  1291         __ deccc(count);
       
  1292         __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
       
  1293         __ delayed()->stb(O4, end_to, 0);
       
  1294     }
  1274 
  1295 
  1275     __ BIND(L_exit);
  1296     __ BIND(L_exit);
  1276     // O3, O4 are used as temp registers
  1297     // O3, O4 are used as temp registers
  1277     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
  1298     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
  1278     __ retl();
  1299     __ retl();
  1309       *entry = __ pc();
  1330       *entry = __ pc();
  1310       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1331       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1311       BLOCK_COMMENT("Entry:");
  1332       BLOCK_COMMENT("Entry:");
  1312     }
  1333     }
  1313 
  1334 
  1314     // for short arrays, just do single element copy
  1335     {
  1315     __ cmp(count, 11); // 8 + 3  (22 bytes)
  1336       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
  1316     __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
  1337       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
  1317     __ delayed()->mov(G0, offset);
  1338       // for short arrays, just do single element copy
  1318 
  1339       __ cmp(count, 11); // 8 + 3  (22 bytes)
  1319     if (aligned) {
  1340       __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
  1320       // 'aligned' == true when it is known statically during compilation
  1341       __ delayed()->mov(G0, offset);
  1321       // of this arraycopy call site that both 'from' and 'to' addresses
  1342 
  1322       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
  1343       if (aligned) {
  1323       //
  1344         // 'aligned' == true when it is known statically during compilation
  1324       // Aligned arrays have 4 bytes alignment in 32-bits VM
  1345         // of this arraycopy call site that both 'from' and 'to' addresses
  1325       // and 8 bytes - in 64-bits VM.
  1346         // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
  1326       //
  1347         //
  1327     } else {
  1348         // Aligned arrays have 4 bytes alignment in 32-bits VM
  1328       // copy 1 element if necessary to align 'to' on an 4 bytes
  1349         // and 8 bytes - in 64-bits VM.
  1329       __ andcc(to, 3, G0);
  1350         //
  1330       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
  1351       } else {
  1331       __ delayed()->lduh(from, 0, O3);
  1352         // copy 1 element if necessary to align 'to' on an 4 bytes
  1332       __ inc(from, 2);
  1353         __ andcc(to, 3, G0);
  1333       __ inc(to, 2);
  1354         __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
  1334       __ dec(count);
  1355         __ delayed()->lduh(from, 0, O3);
  1335       __ sth(O3, to, -2);
  1356         __ inc(from, 2);
  1336     __ BIND(L_skip_alignment);
  1357         __ inc(to, 2);
  1337 
  1358         __ dec(count);
  1338       // copy 2 elements to align 'to' on an 8 byte boundary
  1359         __ sth(O3, to, -2);
  1339       __ andcc(to, 7, G0);
  1360       __ BIND(L_skip_alignment);
  1340       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
  1361 
  1341       __ delayed()->lduh(from, 0, O3);
  1362         // copy 2 elements to align 'to' on an 8 byte boundary
  1342       __ dec(count, 2);
  1363         __ andcc(to, 7, G0);
  1343       __ lduh(from, 2, O4);
  1364         __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
  1344       __ inc(from, 4);
  1365         __ delayed()->lduh(from, 0, O3);
  1345       __ inc(to, 4);
  1366         __ dec(count, 2);
  1346       __ sth(O3, to, -4);
  1367         __ lduh(from, 2, O4);
  1347       __ sth(O4, to, -2);
  1368         __ inc(from, 4);
  1348     __ BIND(L_skip_alignment2);
  1369         __ inc(to, 4);
  1349     }
  1370         __ sth(O3, to, -4);
  1350     if (!aligned) {
  1371         __ sth(O4, to, -2);
  1351       // Copy with shift 16 bytes per iteration if arrays do not have
  1372       __ BIND(L_skip_alignment2);
  1352       // the same alignment mod 8, otherwise fall through to the next
  1373       }
  1353       // code for aligned copy.
  1374       if (!aligned) {
  1354       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
  1375         // Copy with shift 16 bytes per iteration if arrays do not have
  1355       // Also jump over aligned copy after the copy with shift completed.
  1376         // the same alignment mod 8, otherwise fall through to the next
  1356 
  1377         // code for aligned copy.
  1357       copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
  1378         // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
  1358     }
  1379         // Also jump over aligned copy after the copy with shift completed.
  1359 
  1380 
  1360     // Both array are 8 bytes aligned, copy 16 bytes at a time
  1381         copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
  1361       __ and3(count, 3, G4); // Save
  1382       }
  1362       __ srl(count, 2, count);
  1383 
  1363      generate_disjoint_long_copy_core(aligned);
  1384       // Both array are 8 bytes aligned, copy 16 bytes at a time
  1364       __ mov(G4, count); // restore
  1385         __ and3(count, 3, G4); // Save
  1365 
  1386         __ srl(count, 2, count);
  1366     // copy 1 element at a time
  1387        generate_disjoint_long_copy_core(aligned);
  1367     __ BIND(L_copy_2_bytes);
  1388         __ mov(G4, count); // restore
  1368       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
  1389 
  1369       __ align(OptoLoopAlignment);
  1390       // copy 1 element at a time
  1370     __ BIND(L_copy_2_bytes_loop);
  1391       __ BIND(L_copy_2_bytes);
  1371       __ lduh(from, offset, O3);
  1392         __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
  1372       __ deccc(count);
  1393         __ align(OptoLoopAlignment);
  1373       __ sth(O3, to, offset);
  1394       __ BIND(L_copy_2_bytes_loop);
  1374       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
  1395         __ lduh(from, offset, O3);
  1375       __ delayed()->inc(offset, 2);
  1396         __ deccc(count);
       
  1397         __ sth(O3, to, offset);
       
  1398         __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
       
  1399         __ delayed()->inc(offset, 2);
       
  1400     }
  1376 
  1401 
  1377     __ BIND(L_exit);
  1402     __ BIND(L_exit);
  1378       // O3, O4 are used as temp registers
  1403       // O3, O4 are used as temp registers
  1379       inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
  1404       inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
  1380       __ retl();
  1405       __ retl();
  1637       BLOCK_COMMENT("Entry:");
  1662       BLOCK_COMMENT("Entry:");
  1638     }
  1663     }
  1639 
  1664 
  1640     array_overlap_test(nooverlap_target, 1);
  1665     array_overlap_test(nooverlap_target, 1);
  1641 
  1666 
  1642     __ sllx(count, LogBytesPerShort, byte_count);
       
  1643     __ add(to, byte_count, end_to);  // offset after last copied element
       
  1644 
       
  1645     // for short arrays, just do single element copy
       
  1646     __ cmp(count, 11); // 8 + 3  (22 bytes)
       
  1647     __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
       
  1648     __ delayed()->add(from, byte_count, end_from);
       
  1649 
       
  1650     {
  1667     {
  1651       // Align end of arrays since they could be not aligned even
  1668       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
  1652       // when arrays itself are aligned.
  1669       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
  1653 
  1670 
  1654       // copy 1 element if necessary to align 'end_to' on an 4 bytes
  1671       __ sllx(count, LogBytesPerShort, byte_count);
  1655       __ andcc(end_to, 3, G0);
  1672       __ add(to, byte_count, end_to);  // offset after last copied element
  1656       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
  1673 
  1657       __ delayed()->lduh(end_from, -2, O3);
  1674       // for short arrays, just do single element copy
  1658       __ dec(end_from, 2);
  1675       __ cmp(count, 11); // 8 + 3  (22 bytes)
  1659       __ dec(end_to, 2);
  1676       __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
  1660       __ dec(count);
  1677       __ delayed()->add(from, byte_count, end_from);
  1661       __ sth(O3, end_to, 0);
  1678 
  1662     __ BIND(L_skip_alignment);
  1679       {
  1663 
  1680         // Align end of arrays since they could be not aligned even
  1664       // copy 2 elements to align 'end_to' on an 8 byte boundary
  1681         // when arrays itself are aligned.
  1665       __ andcc(end_to, 7, G0);
  1682 
  1666       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
  1683         // copy 1 element if necessary to align 'end_to' on an 4 bytes
  1667       __ delayed()->lduh(end_from, -2, O3);
  1684         __ andcc(end_to, 3, G0);
  1668       __ dec(count, 2);
  1685         __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
  1669       __ lduh(end_from, -4, O4);
  1686         __ delayed()->lduh(end_from, -2, O3);
  1670       __ dec(end_from, 4);
  1687         __ dec(end_from, 2);
  1671       __ dec(end_to, 4);
  1688         __ dec(end_to, 2);
  1672       __ sth(O3, end_to, 2);
  1689         __ dec(count);
  1673       __ sth(O4, end_to, 0);
  1690         __ sth(O3, end_to, 0);
  1674     __ BIND(L_skip_alignment2);
  1691       __ BIND(L_skip_alignment);
  1675     }
  1692 
  1676     if (aligned) {
  1693         // copy 2 elements to align 'end_to' on an 8 byte boundary
  1677       // Both arrays are aligned to 8-bytes in 64-bits VM.
  1694         __ andcc(end_to, 7, G0);
  1678       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
  1695         __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
  1679       // in unaligned case.
  1696         __ delayed()->lduh(end_from, -2, O3);
  1680       __ dec(count, 8);
  1697         __ dec(count, 2);
  1681     } else {
  1698         __ lduh(end_from, -4, O4);
  1682       // Copy with shift 16 bytes per iteration if arrays do not have
  1699         __ dec(end_from, 4);
  1683       // the same alignment mod 8, otherwise jump to the next
  1700         __ dec(end_to, 4);
  1684       // code for aligned copy (and substracting 8 from 'count' before jump).
  1701         __ sth(O3, end_to, 2);
  1685       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
  1702         __ sth(O4, end_to, 0);
  1686       // Also jump over aligned copy after the copy with shift completed.
  1703       __ BIND(L_skip_alignment2);
  1687 
  1704       }
  1688       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
  1705       if (aligned) {
       
  1706         // Both arrays are aligned to 8-bytes in 64-bits VM.
       
  1707         // The 'count' is decremented in copy_16_bytes_backward_with_shift()
       
  1708         // in unaligned case.
       
  1709         __ dec(count, 8);
       
  1710       } else {
       
  1711         // Copy with shift 16 bytes per iteration if arrays do not have
       
  1712         // the same alignment mod 8, otherwise jump to the next
       
  1713         // code for aligned copy (and substracting 8 from 'count' before jump).
       
  1714         // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
       
  1715         // Also jump over aligned copy after the copy with shift completed.
       
  1716 
       
  1717         copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
  1689                                         L_aligned_copy, L_copy_2_bytes);
  1718                                         L_aligned_copy, L_copy_2_bytes);
  1690     }
  1719       }
  1691     // copy 4 elements (16 bytes) at a time
  1720       // copy 4 elements (16 bytes) at a time
  1692       __ align(OptoLoopAlignment);
  1721         __ align(OptoLoopAlignment);
  1693     __ BIND(L_aligned_copy);
  1722       __ BIND(L_aligned_copy);
  1694       __ dec(end_from, 16);
  1723         __ dec(end_from, 16);
  1695       __ ldx(end_from, 8, O3);
  1724         __ ldx(end_from, 8, O3);
  1696       __ ldx(end_from, 0, O4);
  1725         __ ldx(end_from, 0, O4);
  1697       __ dec(end_to, 16);
  1726         __ dec(end_to, 16);
  1698       __ deccc(count, 8);
  1727         __ deccc(count, 8);
  1699       __ stx(O3, end_to, 8);
  1728         __ stx(O3, end_to, 8);
  1700       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
  1729         __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
  1701       __ delayed()->stx(O4, end_to, 0);
  1730         __ delayed()->stx(O4, end_to, 0);
  1702       __ inc(count, 8);
  1731         __ inc(count, 8);
  1703 
  1732 
  1704     // copy 1 element (2 bytes) at a time
  1733       // copy 1 element (2 bytes) at a time
  1705     __ BIND(L_copy_2_bytes);
  1734       __ BIND(L_copy_2_bytes);
  1706       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
  1735         __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
  1707     __ BIND(L_copy_2_bytes_loop);
  1736       __ BIND(L_copy_2_bytes_loop);
  1708       __ dec(end_from, 2);
  1737         __ dec(end_from, 2);
  1709       __ dec(end_to, 2);
  1738         __ dec(end_to, 2);
  1710       __ lduh(end_from, 0, O4);
  1739         __ lduh(end_from, 0, O4);
  1711       __ deccc(count);
  1740         __ deccc(count);
  1712       __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
  1741         __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
  1713       __ delayed()->sth(O4, end_to, 0);
  1742         __ delayed()->sth(O4, end_to, 0);
  1714 
  1743     }
  1715     __ BIND(L_exit);
  1744     __ BIND(L_exit);
  1716     // O3, O4 are used as temp registers
  1745     // O3, O4 are used as temp registers
  1717     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
  1746     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
  1718     __ retl();
  1747     __ retl();
  1719     __ delayed()->mov(G0, O0); // return 0
  1748     __ delayed()->mov(G0, O0); // return 0
  1868     if (entry != NULL) {
  1897     if (entry != NULL) {
  1869       *entry = __ pc();
  1898       *entry = __ pc();
  1870       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1899       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  1871       BLOCK_COMMENT("Entry:");
  1900       BLOCK_COMMENT("Entry:");
  1872     }
  1901     }
  1873 
  1902     {
  1874     generate_disjoint_int_copy_core(aligned);
  1903       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
  1875 
  1904       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
       
  1905       generate_disjoint_int_copy_core(aligned);
       
  1906     }
  1876     // O3, O4 are used as temp registers
  1907     // O3, O4 are used as temp registers
  1877     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
  1908     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
  1878     __ retl();
  1909     __ retl();
  1879     __ delayed()->mov(G0, O0); // return 0
  1910     __ delayed()->mov(G0, O0); // return 0
  1880     return start;
  1911     return start;
  2003       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  2034       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  2004       BLOCK_COMMENT("Entry:");
  2035       BLOCK_COMMENT("Entry:");
  2005     }
  2036     }
  2006 
  2037 
  2007     array_overlap_test(nooverlap_target, 2);
  2038     array_overlap_test(nooverlap_target, 2);
  2008 
  2039     {
  2009     generate_conjoint_int_copy_core(aligned);
  2040       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
  2010 
  2041       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
       
  2042       generate_conjoint_int_copy_core(aligned);
       
  2043     }
  2011     // O3, O4 are used as temp registers
  2044     // O3, O4 are used as temp registers
  2012     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
  2045     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
  2013     __ retl();
  2046     __ retl();
  2014     __ delayed()->mov(G0, O0); // return 0
  2047     __ delayed()->mov(G0, O0); // return 0
  2015     return start;
  2048     return start;
  2154       *entry = __ pc();
  2187       *entry = __ pc();
  2155       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  2188       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  2156       BLOCK_COMMENT("Entry:");
  2189       BLOCK_COMMENT("Entry:");
  2157     }
  2190     }
  2158 
  2191 
  2159     generate_disjoint_long_copy_core(aligned);
  2192     {
  2160 
  2193       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
       
  2194       UnsafeCopyMemoryMark ucmm(this, true, false);
       
  2195       generate_disjoint_long_copy_core(aligned);
       
  2196     }
  2161     // O3, O4 are used as temp registers
  2197     // O3, O4 are used as temp registers
  2162     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
  2198     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
  2163     __ retl();
  2199     __ retl();
  2164     __ delayed()->mov(G0, O0); // return 0
  2200     __ delayed()->mov(G0, O0); // return 0
  2165     return start;
  2201     return start;
  2230       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  2266       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  2231       BLOCK_COMMENT("Entry:");
  2267       BLOCK_COMMENT("Entry:");
  2232     }
  2268     }
  2233 
  2269 
  2234     array_overlap_test(nooverlap_target, 3);
  2270     array_overlap_test(nooverlap_target, 3);
  2235 
  2271     {
  2236     generate_conjoint_long_copy_core(aligned);
  2272       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
  2237 
  2273       UnsafeCopyMemoryMark ucmm(this, true, false);
       
  2274       generate_conjoint_long_copy_core(aligned);
       
  2275     }
  2238     // O3, O4 are used as temp registers
  2276     // O3, O4 are used as temp registers
  2239     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
  2277     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
  2240     __ retl();
  2278     __ retl();
  2241     __ delayed()->mov(G0, O0); // return 0
  2279     __ delayed()->mov(G0, O0); // return 0
  2242     return start;
  2280     return start;
  2926     address entry_jshort_arraycopy;
  2964     address entry_jshort_arraycopy;
  2927     address entry_jint_arraycopy;
  2965     address entry_jint_arraycopy;
  2928     address entry_oop_arraycopy;
  2966     address entry_oop_arraycopy;
  2929     address entry_jlong_arraycopy;
  2967     address entry_jlong_arraycopy;
  2930     address entry_checkcast_arraycopy;
  2968     address entry_checkcast_arraycopy;
       
  2969 
       
  2970     address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
       
  2971     UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit);
  2931 
  2972 
  2932     //*** jbyte
  2973     //*** jbyte
  2933     // Always need aligned and unaligned versions
  2974     // Always need aligned and unaligned versions
  2934     StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
  2975     StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
  2935                                                                                   "jbyte_disjoint_arraycopy");
  2976                                                                                   "jbyte_disjoint_arraycopy");
  5819     }
  5860     }
  5820   }
  5861   }
  5821 
  5862 
  5822 }; // end class declaration
  5863 }; // end class declaration
  5823 
  5864 
       
  5865 #define UCM_TABLE_MAX_ENTRIES 8
  5824 void StubGenerator_generate(CodeBuffer* code, bool all) {
  5866 void StubGenerator_generate(CodeBuffer* code, bool all) {
       
  5867   if (UnsafeCopyMemory::_table == NULL) {
       
  5868     UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
       
  5869   }
  5825   StubGenerator g(code, all);
  5870   StubGenerator g(code, all);
  5826 }
  5871 }