--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Tue Jan 19 17:52:52 2016 +0000
@@ -4442,11 +4442,7 @@
enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
MacroAssembler _masm(&cbuf);
- address page = (address)$src$$constant;
- Register dst_reg = as_Register($dst$$reg);
- unsigned long off;
- __ adrp(dst_reg, ExternalAddress(page), off);
- assert(off == 0, "assumed offset == 0");
+ __ load_byte_map_base($dst$$Register);
%}
enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
--- a/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Tue Jan 19 17:52:52 2016 +0000
@@ -1150,9 +1150,6 @@
#if INCLUDE_ALL_GCS
-// Registers to be saved around calls to g1_wb_pre or g1_wb_post
-#define G1_SAVE_REGS (RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2))
-
case g1_pre_barrier_slow_id:
{
StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
@@ -1194,10 +1191,10 @@
__ b(done);
__ bind(runtime);
- __ push(G1_SAVE_REGS, sp);
+ __ push_call_clobbered_registers();
f.load_argument(0, pre_val);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
- __ pop(G1_SAVE_REGS, sp);
+ __ pop_call_clobbered_registers();
__ bind(done);
}
break;
@@ -1225,45 +1222,49 @@
Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
DirtyCardQueue::byte_offset_of_buf()));
- const Register card_addr = rscratch2;
- ExternalAddress cardtable((address) ct->byte_map_base);
+ const Register card_offset = rscratch2;
+ // LR is free here, so we can use it to hold the byte_map_base.
+ const Register byte_map_base = lr;
- f.load_argument(0, card_addr);
- __ lsr(card_addr, card_addr, CardTableModRefBS::card_shift);
- unsigned long offset;
- __ adrp(rscratch1, cardtable, offset);
- __ add(card_addr, card_addr, rscratch1);
- __ ldrb(rscratch1, Address(card_addr, offset));
+ assert_different_registers(card_offset, byte_map_base, rscratch1);
+
+ f.load_argument(0, card_offset);
+ __ lsr(card_offset, card_offset, CardTableModRefBS::card_shift);
+ __ load_byte_map_base(byte_map_base);
+ __ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val());
__ br(Assembler::EQ, done);
assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
__ membar(Assembler::StoreLoad);
- __ ldrb(rscratch1, Address(card_addr, offset));
+ __ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cbzw(rscratch1, done);
// storing region crossing non-NULL, card is clean.
// dirty card and log.
- __ strb(zr, Address(card_addr, offset));
+ __ strb(zr, Address(byte_map_base, card_offset));
+
+ // Convert card offset into an address in card_addr
+ Register card_addr = card_offset;
+ __ add(card_addr, byte_map_base, card_addr);
__ ldr(rscratch1, queue_index);
__ cbz(rscratch1, runtime);
__ sub(rscratch1, rscratch1, wordSize);
__ str(rscratch1, queue_index);
- const Register buffer_addr = r0;
+ // Reuse LR to hold buffer_addr
+ const Register buffer_addr = lr;
- __ push(RegSet::of(r0, r1), sp);
__ ldr(buffer_addr, buffer);
__ str(card_addr, Address(buffer_addr, rscratch1));
- __ pop(RegSet::of(r0, r1), sp);
__ b(done);
__ bind(runtime);
- __ push(G1_SAVE_REGS, sp);
+ __ push_call_clobbered_registers();
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
- __ pop(G1_SAVE_REGS, sp);
+ __ pop_call_clobbered_registers();
__ bind(done);
}
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jan 19 17:52:52 2016 +0000
@@ -2301,6 +2301,30 @@
}
#endif
+void MacroAssembler::push_call_clobbered_registers() {
+ push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
+
+ // Push v0-v7, v16-v31.
+ for (int i = 30; i >= 0; i -= 2) {
+ if (i <= v7->encoding() || i >= v16->encoding()) {
+ stpd(as_FloatRegister(i), as_FloatRegister(i+1),
+ Address(pre(sp, -2 * wordSize)));
+ }
+ }
+}
+
+void MacroAssembler::pop_call_clobbered_registers() {
+
+ for (int i = 0; i < 32; i += 2) {
+ if (i <= v7->encoding() || i >= v16->encoding()) {
+ ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+ Address(post(sp, 2 * wordSize)));
+ }
+ }
+
+ pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
+}
+
void MacroAssembler::push_CPU_state(bool save_vectors) {
push(0x3fffffff, sp); // integer registers except lr & sp
@@ -3099,12 +3123,7 @@
assert(CardTableModRefBS::dirty_card_val() == 0, "must be");
- {
- ExternalAddress cardtable((address) ct->byte_map_base);
- unsigned long offset;
- adrp(rscratch1, cardtable, offset);
- assert(offset == 0, "byte_map_base is misaligned");
- }
+ load_byte_map_base(rscratch1);
if (UseCondCardMark) {
Label L_already_dirty;
@@ -3596,12 +3615,10 @@
lsr(card_addr, store_addr, CardTableModRefBS::card_shift);
- unsigned long offset;
- adrp(tmp2, cardtable, offset);
-
// get the address of the card
+ load_byte_map_base(tmp2);
add(card_addr, card_addr, tmp2);
- ldrb(tmp2, Address(card_addr, offset));
+ ldrb(tmp2, Address(card_addr));
cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
br(Assembler::EQ, done);
@@ -3609,13 +3626,13 @@
membar(Assembler::StoreLoad);
- ldrb(tmp2, Address(card_addr, offset));
+ ldrb(tmp2, Address(card_addr));
cbzw(tmp2, done);
// storing a region crossing, non-NULL oop, card is clean.
// dirty card and log.
- strb(zr, Address(card_addr, offset));
+ strb(zr, Address(card_addr));
ldr(rscratch1, queue_index);
cbz(rscratch1, runtime);
@@ -3971,6 +3988,9 @@
long offset_low = dest_page - low_page;
long offset_high = dest_page - high_page;
+ assert(is_valid_AArch64_address(dest.target()), "bad address");
+ assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
+
InstructionMark im(this);
code_section()->relocate(inst_mark(), dest.rspec());
// 8143067: Ensure that the adrp can reach the dest from anywhere within
@@ -3982,11 +4002,26 @@
long offset = dest_page - pc_page;
offset = (offset & ((1<<20)-1)) << 12;
_adrp(reg1, pc()+offset);
- movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32);
+ movk(reg1, (unsigned long)dest.target() >> 32, 32);
}
byte_offset = (unsigned long)dest.target() & 0xfff;
}
+void MacroAssembler::load_byte_map_base(Register reg) {
+ jbyte *byte_map_base =
+ ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base;
+
+ if (is_valid_AArch64_address((address)byte_map_base)) {
+ // Strictly speaking the byte_map_base isn't an address at all,
+ // and it might even be negative.
+ unsigned long offset;
+ adrp(reg, ExternalAddress((address)byte_map_base), offset);
+ assert(offset == 0, "misaligned card table base");
+ } else {
+ mov(reg, (uint64_t)byte_map_base);
+ }
+}
+
void MacroAssembler::build_frame(int framesize) {
assert(framesize > 0, "framesize must be > 0");
if (framesize < ((1 << 9) + 2 * wordSize)) {
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Jan 19 17:52:52 2016 +0000
@@ -437,6 +437,13 @@
void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
+ // Push and pop everything that might be clobbered by a native
+ // runtime call except rscratch1 and rscratch2. (They are always
+ // scratch, so we don't have to protect them.) Only save the lower
+ // 64 bits of each vector register.
+ void push_call_clobbered_registers();
+ void pop_call_clobbered_registers();
+
// now mov instructions for loading absolute addresses and 32 or
// 64 bit integers
@@ -1116,6 +1123,15 @@
// of your data.
Address form_address(Register Rd, Register base, long byte_offset, int shift);
+ // Return true iff an address is within the 48-bit AArch64 address
+ // space.
+ bool is_valid_AArch64_address(address a) {
+ return ((uint64_t)a >> 48) == 0;
+ }
+
+ // Load the base of the cardtable byte map into reg.
+ void load_byte_map_base(Register reg);
+
// Prolog generator routines to support switch between x86 code and
// generated ARM code
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jan 19 17:52:52 2016 +0000
@@ -744,7 +744,7 @@
__ sub(end, end, start); // number of bytes to copy
const Register count = end; // 'end' register contains bytes count now
- __ mov(scratch, (address)ct->byte_map_base);
+ __ load_byte_map_base(scratch);
__ add(start, start, scratch);
if (UseConcMarkSweepGC) {
__ membar(__ StoreStore);