95 case 1: stb(dst, offs, base); break; |
95 case 1: stb(dst, offs, base); break; |
96 default: ShouldNotReachHere(); |
96 default: ShouldNotReachHere(); |
97 } |
97 } |
98 } |
98 } |
99 |
99 |
100 void MacroAssembler::align(int modulus) { |
100 void MacroAssembler::align(int modulus, int max, int rem) { |
101 while (offset() % modulus != 0) nop(); |
101 int padding = (rem + modulus - (offset() % modulus)) % modulus; |
|
102 if (padding > max) return; |
|
103 for (int c = (padding >> 2); c > 0; --c) { nop(); } |
102 } |
104 } |
103 |
105 |
104 // Issue instructions that calculate given TOC from global TOC. |
106 // Issue instructions that calculate given TOC from global TOC. |
105 void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, |
107 void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, |
106 bool add_relocation, bool emit_dummy_addr) { |
108 bool add_relocation, bool emit_dummy_addr) { |
184 } |
186 } |
185 } |
187 } |
186 |
188 |
187 #ifdef _LP64 |
189 #ifdef _LP64 |
188 // Patch compressed oops or klass constants. |
190 // Patch compressed oops or klass constants. |
|
191 // Assembler sequence is |
|
192 // 1) compressed oops: |
|
193 // lis rx = const.hi |
|
194 // ori rx = rx | const.lo |
|
195 // 2) compressed klass: |
|
196 // lis rx = const.hi |
|
197 // clrldi rx = rx & 0xFFFFffff // clearMS32b, optional |
|
198 // ori rx = rx | const.lo |
|
199 // Clrldi will be passed by. |
189 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { |
200 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { |
190 assert(UseCompressedOops, "Should only patch compressed oops"); |
201 assert(UseCompressedOops, "Should only patch compressed oops"); |
191 |
202 |
192 const address inst2_addr = a; |
203 const address inst2_addr = a; |
193 const int inst2 = *(int *)inst2_addr; |
204 const int inst2 = *(int *)inst2_addr; |
194 |
205 |
195 // The relocation points to the second instruction, the addi, |
206 // The relocation points to the second instruction, the ori, |
196 // and the addi reads and writes the same register dst. |
207 // and the ori reads and writes the same register dst. |
197 const int dst = inv_rt_field(inst2); |
208 const int dst = inv_rta_field(inst2); |
198 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); |
209 assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst"); |
199 // Now, find the preceding addis which writes to dst. |
210 // Now, find the preceding addis which writes to dst. |
200 int inst1 = 0; |
211 int inst1 = 0; |
201 address inst1_addr = inst2_addr - BytesPerInstWord; |
212 address inst1_addr = inst2_addr - BytesPerInstWord; |
202 bool inst1_found = false; |
213 bool inst1_found = false; |
203 while (inst1_addr >= bound) { |
214 while (inst1_addr >= bound) { |
208 assert(inst1_found, "inst is not lis"); |
219 assert(inst1_found, "inst is not lis"); |
209 |
220 |
210 int xc = (data >> 16) & 0xffff; |
221 int xc = (data >> 16) & 0xffff; |
211 int xd = (data >> 0) & 0xffff; |
222 int xd = (data >> 0) & 0xffff; |
212 |
223 |
213 set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2 |
224 set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo |
214 set_imm((int *)inst2_addr, (short)(xd)); |
225 set_imm((int *)inst2_addr, (short)(xd)); |
|
226 |
215 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); |
227 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); |
216 } |
228 } |
217 |
229 |
218 // Get compressed oop or klass constant. |
230 // Get compressed oop or klass constant. |
219 narrowOop MacroAssembler::get_narrow_oop(address a, address bound) { |
231 narrowOop MacroAssembler::get_narrow_oop(address a, address bound) { |
220 assert(UseCompressedOops, "Should only patch compressed oops"); |
232 assert(UseCompressedOops, "Should only patch compressed oops"); |
221 |
233 |
222 const address inst2_addr = a; |
234 const address inst2_addr = a; |
223 const int inst2 = *(int *)inst2_addr; |
235 const int inst2 = *(int *)inst2_addr; |
224 |
236 |
225 // The relocation points to the second instruction, the addi, |
237 // The relocation points to the second instruction, the ori, |
226 // and the addi reads and writes the same register dst. |
238 // and the ori reads and writes the same register dst. |
227 const int dst = inv_rt_field(inst2); |
239 const int dst = inv_rta_field(inst2); |
228 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); |
240 assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst"); |
229 // Now, find the preceding lis which writes to dst. |
241 // Now, find the preceding lis which writes to dst. |
230 int inst1 = 0; |
242 int inst1 = 0; |
231 address inst1_addr = inst2_addr - BytesPerInstWord; |
243 address inst1_addr = inst2_addr - BytesPerInstWord; |
232 bool inst1_found = false; |
244 bool inst1_found = false; |
233 |
245 |
236 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;} |
248 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;} |
237 inst1_addr -= BytesPerInstWord; |
249 inst1_addr -= BytesPerInstWord; |
238 } |
250 } |
239 assert(inst1_found, "inst is not lis"); |
251 assert(inst1_found, "inst is not lis"); |
240 |
252 |
241 uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff)); |
253 uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff)); |
242 uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16); |
254 uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16); |
|
255 |
243 return (int) (xl | xh); |
256 return (int) (xl | xh); |
244 } |
257 } |
245 #endif // _LP64 |
258 #endif // _LP64 |
246 |
259 |
247 void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) { |
260 void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) { |
250 // we will end up with a failing NativeCall::verify(x) where x is |
263 // we will end up with a failing NativeCall::verify(x) where x is |
251 // the address of the constant pool entry. |
264 // the address of the constant pool entry. |
252 // FIXME: We should insert relocation information for oops at the constant |
265 // FIXME: We should insert relocation information for oops at the constant |
253 // pool entries instead of inserting it at the loads; patching of a constant |
266 // pool entries instead of inserting it at the loads; patching of a constant |
254 // pool entry should be less expensive. |
267 // pool entry should be less expensive. |
255 Unimplemented(); |
268 address oop_address = address_constant((address)a.value(), RelocationHolder::none); |
256 if (false) { |
269 // Relocate at the pc of the load. |
257 address oop_address = address_constant((address)a.value(), RelocationHolder::none); |
270 relocate(a.rspec()); |
258 // Relocate at the pc of the load. |
271 toc_offset = (int)(oop_address - code()->consts()->start()); |
259 relocate(a.rspec()); |
|
260 toc_offset = (int)(oop_address - code()->consts()->start()); |
|
261 } |
|
262 ld_largeoffset_unchecked(dst, toc_offset, toc, true); |
272 ld_largeoffset_unchecked(dst, toc_offset, toc, true); |
263 } |
273 } |
264 |
274 |
265 bool MacroAssembler::is_load_const_from_method_toc_at(address a) { |
275 bool MacroAssembler::is_load_const_from_method_toc_at(address a) { |
266 const address inst1_addr = a; |
276 const address inst1_addr = a; |
530 const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord; |
540 const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord; |
531 masm.bc(opposite_boint, biint, not_taken_pc); |
541 masm.bc(opposite_boint, biint, not_taken_pc); |
532 masm.b(dest); |
542 masm.b(dest); |
533 } |
543 } |
534 } |
544 } |
535 ICache::invalidate_range(instruction_addr, code_size); |
545 ICache::ppc64_flush_icache_bytes(instruction_addr, code_size); |
536 } |
546 } |
537 |
547 |
538 // Emit a NOT mt-safe patchable 64 bit absolute call/jump. |
548 // Emit a NOT mt-safe patchable 64 bit absolute call/jump. |
539 void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { |
549 void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { |
540 // get current pc |
550 // get current pc |
671 ResourceMark rm; |
681 ResourceMark rm; |
672 int code_size = MacroAssembler::bxx64_patchable_size; |
682 int code_size = MacroAssembler::bxx64_patchable_size; |
673 CodeBuffer buf(instruction_addr, code_size); |
683 CodeBuffer buf(instruction_addr, code_size); |
674 MacroAssembler masm(&buf); |
684 MacroAssembler masm(&buf); |
675 masm.bxx64_patchable(dest, relocInfo::none, link); |
685 masm.bxx64_patchable(dest, relocInfo::none, link); |
676 ICache::invalidate_range(instruction_addr, code_size); |
686 ICache::ppc64_flush_icache_bytes(instruction_addr, code_size); |
677 } |
687 } |
678 |
688 |
679 // Get dest address of a bxx64_patchable instruction. |
689 // Get dest address of a bxx64_patchable instruction. |
680 address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) { |
690 address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) { |
681 if (is_bxx64_patchable_variant1_at(instruction_addr, link)) { |
691 if (is_bxx64_patchable_variant1_at(instruction_addr, link)) { |
956 // conventions. |
966 // conventions. |
957 // We don't use the TOC in generated code, so there is no need to save |
967 // We don't use the TOC in generated code, so there is no need to save |
958 // and restore its value. |
968 // and restore its value. |
959 address MacroAssembler::call_c(Register fd) { |
969 address MacroAssembler::call_c(Register fd) { |
960 return branch_to(fd, /*and_link=*/true, |
970 return branch_to(fd, /*and_link=*/true, |
|
971 /*save toc=*/false, |
|
972 /*restore toc=*/false, |
|
973 /*load toc=*/true, |
|
974 /*load env=*/true); |
|
975 } |
|
976 |
|
977 address MacroAssembler::call_c_and_return_to_caller(Register fd) { |
|
978 return branch_to(fd, /*and_link=*/false, |
961 /*save toc=*/false, |
979 /*save toc=*/false, |
962 /*restore toc=*/false, |
980 /*restore toc=*/false, |
963 /*load toc=*/true, |
981 /*load toc=*/true, |
964 /*load env=*/true); |
982 /*load env=*/true); |
965 } |
983 } |
2313 // known pc and don't have to rely on the native call having a |
2331 // known pc and don't have to rely on the native call having a |
2314 // standard frame linkage where we can find the pc. |
2332 // standard frame linkage where we can find the pc. |
2315 if (last_Java_pc != noreg) |
2333 if (last_Java_pc != noreg) |
2316 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); |
2334 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); |
2317 |
2335 |
2318 // set last_Java_sp last |
2336 // Set last_Java_sp last. |
2319 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); |
2337 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); |
2320 } |
2338 } |
2321 |
2339 |
2322 void MacroAssembler::reset_last_Java_frame(void) { |
2340 void MacroAssembler::reset_last_Java_frame(void) { |
2323 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), |
2341 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), |
2450 } |
2468 } |
2451 } else { |
2469 } else { |
2452 load_const(R30, Universe::narrow_ptrs_base_addr(), tmp); |
2470 load_const(R30, Universe::narrow_ptrs_base_addr(), tmp); |
2453 ld(R30, 0, R30); |
2471 ld(R30, 0, R30); |
2454 } |
2472 } |
|
2473 } |
|
2474 |
|
2475 // Clear Array |
|
2476 // Kills both input registers. tmp == R0 is allowed. |
|
2477 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) { |
|
2478 // Procedure for large arrays (uses data cache block zero instruction). |
|
2479 Label startloop, fast, fastloop, small_rest, restloop, done; |
|
2480 const int cl_size = VM_Version::get_cache_line_size(), |
|
2481 cl_dwords = cl_size>>3, |
|
2482 cl_dw_addr_bits = exact_log2(cl_dwords), |
|
2483 dcbz_min = 1; // Min count of dcbz executions, needs to be >0. |
|
2484 |
|
2485 //2: |
|
2486 cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included). |
|
2487 blt(CCR1, small_rest); // Too small. |
|
2488 rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line. |
|
2489 beq(CCR0, fast); // Already 128byte aligned. |
|
2490 |
|
2491 subfic(tmp, tmp, cl_dwords); |
|
2492 mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords). |
|
2493 subf(cnt_dwords, tmp, cnt_dwords); // rest. |
|
2494 li(tmp, 0); |
|
2495 //10: |
|
2496 bind(startloop); // Clear at the beginning to reach 128byte boundary. |
|
2497 std(tmp, 0, base_ptr); // Clear 8byte aligned block. |
|
2498 addi(base_ptr, base_ptr, 8); |
|
2499 bdnz(startloop); |
|
2500 //13: |
|
2501 bind(fast); // Clear 128byte blocks. |
|
2502 srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0). |
|
2503 andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords. |
|
2504 mtctr(tmp); // Load counter. |
|
2505 //16: |
|
2506 bind(fastloop); |
|
2507 dcbz(base_ptr); // Clear 128byte aligned block. |
|
2508 addi(base_ptr, base_ptr, cl_size); |
|
2509 bdnz(fastloop); |
|
2510 if (InsertEndGroupPPC64) { endgroup(); } else { nop(); } |
|
2511 //20: |
|
2512 bind(small_rest); |
|
2513 cmpdi(CCR0, cnt_dwords, 0); // size 0? |
|
2514 beq(CCR0, done); // rest == 0 |
|
2515 li(tmp, 0); |
|
2516 mtctr(cnt_dwords); // Load counter. |
|
2517 //24: |
|
2518 bind(restloop); // Clear rest. |
|
2519 std(tmp, 0, base_ptr); // Clear 8byte aligned block. |
|
2520 addi(base_ptr, base_ptr, 8); |
|
2521 bdnz(restloop); |
|
2522 //27: |
|
2523 bind(done); |
2455 } |
2524 } |
2456 |
2525 |
2457 /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// |
2526 /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// |
2458 |
2527 |
2459 // Search for a single jchar in an jchar[]. |
2528 // Search for a single jchar in an jchar[]. |
2924 // READ: oop. KILL: R0. Volatile floats perhaps. |
2993 // READ: oop. KILL: R0. Volatile floats perhaps. |
2925 void MacroAssembler::verify_oop(Register oop, const char* msg) { |
2994 void MacroAssembler::verify_oop(Register oop, const char* msg) { |
2926 if (!VerifyOops) { |
2995 if (!VerifyOops) { |
2927 return; |
2996 return; |
2928 } |
2997 } |
2929 // will be preserved. |
2998 // Will be preserved. |
2930 Register tmp = R11; |
2999 Register tmp = R11; |
2931 assert(oop != tmp, "precondition"); |
3000 assert(oop != tmp, "precondition"); |
2932 unsigned int nbytes_save = 10*8; // 10 volatile gprs |
3001 unsigned int nbytes_save = 10*8; // 10 volatile gprs |
2933 address/* FunctionDescriptor** */fd = |
3002 address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address(); |
2934 StubRoutines::verify_oop_subroutine_entry_address(); |
|
2935 // save tmp |
3003 // save tmp |
2936 mr(R0, tmp); |
3004 mr(R0, tmp); |
2937 // kill tmp |
3005 // kill tmp |
2938 save_LR_CR(tmp); |
3006 save_LR_CR(tmp); |
2939 push_frame_abi112(nbytes_save, tmp); |
3007 push_frame_abi112(nbytes_save, tmp); |