8146709: AArch64: Incorrect use of ADRP for byte_map_base
authoraph
Tue, 19 Jan 2016 17:52:52 +0000
changeset 35579 d21d5a0db03f
parent 35578 33d25acfb1fd
child 35580 1225225bd34e
8146709: AArch64: Incorrect use of ADRP for byte_map_base Reviewed-by: roland
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Tue Jan 19 17:52:52 2016 +0000
@@ -4442,11 +4442,7 @@
 
   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
     MacroAssembler _masm(&cbuf);
-    address page = (address)$src$$constant;
-    Register dst_reg = as_Register($dst$$reg);
-    unsigned long off;
-    __ adrp(dst_reg, ExternalAddress(page), off);
-    assert(off == 0, "assumed offset == 0");
+    __ load_byte_map_base($dst$$Register);
   %}
 
   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
--- a/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Jan 19 17:52:52 2016 +0000
@@ -1150,9 +1150,6 @@
 
 #if INCLUDE_ALL_GCS
 
-// Registers to be saved around calls to g1_wb_pre or g1_wb_post
-#define G1_SAVE_REGS (RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2))
-
     case g1_pre_barrier_slow_id:
       {
         StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
@@ -1194,10 +1191,10 @@
         __ b(done);
 
         __ bind(runtime);
-        __ push(G1_SAVE_REGS, sp);
+        __ push_call_clobbered_registers();
         f.load_argument(0, pre_val);
         __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
-        __ pop(G1_SAVE_REGS, sp);
+        __ pop_call_clobbered_registers();
         __ bind(done);
       }
       break;
@@ -1225,45 +1222,49 @@
         Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
                                         DirtyCardQueue::byte_offset_of_buf()));
 
-        const Register card_addr = rscratch2;
-        ExternalAddress cardtable((address) ct->byte_map_base);
+        const Register card_offset = rscratch2;
+        // LR is free here, so we can use it to hold the byte_map_base.
+        const Register byte_map_base = lr;
 
-        f.load_argument(0, card_addr);
-        __ lsr(card_addr, card_addr, CardTableModRefBS::card_shift);
-        unsigned long offset;
-        __ adrp(rscratch1, cardtable, offset);
-        __ add(card_addr, card_addr, rscratch1);
-        __ ldrb(rscratch1, Address(card_addr, offset));
+        assert_different_registers(card_offset, byte_map_base, rscratch1);
+
+        f.load_argument(0, card_offset);
+        __ lsr(card_offset, card_offset, CardTableModRefBS::card_shift);
+        __ load_byte_map_base(byte_map_base);
+        __ ldrb(rscratch1, Address(byte_map_base, card_offset));
         __ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val());
         __ br(Assembler::EQ, done);
 
         assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
 
         __ membar(Assembler::StoreLoad);
-        __ ldrb(rscratch1, Address(card_addr, offset));
+        __ ldrb(rscratch1, Address(byte_map_base, card_offset));
         __ cbzw(rscratch1, done);
 
         // storing region crossing non-NULL, card is clean.
         // dirty card and log.
-        __ strb(zr, Address(card_addr, offset));
+        __ strb(zr, Address(byte_map_base, card_offset));
+
+        // Convert card offset into an address in card_addr
+        Register card_addr = card_offset;
+        __ add(card_addr, byte_map_base, card_addr);
 
         __ ldr(rscratch1, queue_index);
         __ cbz(rscratch1, runtime);
         __ sub(rscratch1, rscratch1, wordSize);
         __ str(rscratch1, queue_index);
 
-        const Register buffer_addr = r0;
+        // Reuse LR to hold buffer_addr
+        const Register buffer_addr = lr;
 
-        __ push(RegSet::of(r0, r1), sp);
         __ ldr(buffer_addr, buffer);
         __ str(card_addr, Address(buffer_addr, rscratch1));
-        __ pop(RegSet::of(r0, r1), sp);
         __ b(done);
 
         __ bind(runtime);
-        __ push(G1_SAVE_REGS, sp);
+        __ push_call_clobbered_registers();
         __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
-        __ pop(G1_SAVE_REGS, sp);
+        __ pop_call_clobbered_registers();
         __ bind(done);
 
       }
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Jan 19 17:52:52 2016 +0000
@@ -2301,6 +2301,30 @@
 }
 #endif
 
+void MacroAssembler::push_call_clobbered_registers() {
+  push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
+
+  // Push v0-v7, v16-v31.
+  for (int i = 30; i >= 0; i -= 2) {
+    if (i <= v7->encoding() || i >= v16->encoding()) {
+        stpd(as_FloatRegister(i), as_FloatRegister(i+1),
+             Address(pre(sp, -2 * wordSize)));
+    }
+  }
+}
+
+void MacroAssembler::pop_call_clobbered_registers() {
+
+  for (int i = 0; i < 32; i += 2) {
+    if (i <= v7->encoding() || i >= v16->encoding()) {
+      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 2 * wordSize)));
+    }
+  }
+
+  pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
+}
+
 void MacroAssembler::push_CPU_state(bool save_vectors) {
   push(0x3fffffff, sp);         // integer registers except lr & sp
 
@@ -3099,12 +3123,7 @@
 
   assert(CardTableModRefBS::dirty_card_val() == 0, "must be");
 
-  {
-    ExternalAddress cardtable((address) ct->byte_map_base);
-    unsigned long offset;
-    adrp(rscratch1, cardtable, offset);
-    assert(offset == 0, "byte_map_base is misaligned");
-  }
+  load_byte_map_base(rscratch1);
 
   if (UseCondCardMark) {
     Label L_already_dirty;
@@ -3596,12 +3615,10 @@
 
   lsr(card_addr, store_addr, CardTableModRefBS::card_shift);
 
-  unsigned long offset;
-  adrp(tmp2, cardtable, offset);
-
   // get the address of the card
+  load_byte_map_base(tmp2);
   add(card_addr, card_addr, tmp2);
-  ldrb(tmp2, Address(card_addr, offset));
+  ldrb(tmp2, Address(card_addr));
   cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
   br(Assembler::EQ, done);
 
@@ -3609,13 +3626,13 @@
 
   membar(Assembler::StoreLoad);
 
-  ldrb(tmp2, Address(card_addr, offset));
+  ldrb(tmp2, Address(card_addr));
   cbzw(tmp2, done);
 
   // storing a region crossing, non-NULL oop, card is clean.
   // dirty card and log.
 
-  strb(zr, Address(card_addr, offset));
+  strb(zr, Address(card_addr));
 
   ldr(rscratch1, queue_index);
   cbz(rscratch1, runtime);
@@ -3971,6 +3988,9 @@
   long offset_low = dest_page - low_page;
   long offset_high = dest_page - high_page;
 
+  assert(is_valid_AArch64_address(dest.target()), "bad address");
+  assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
+
   InstructionMark im(this);
   code_section()->relocate(inst_mark(), dest.rspec());
   // 8143067: Ensure that the adrp can reach the dest from anywhere within
@@ -3982,11 +4002,26 @@
     long offset = dest_page - pc_page;
     offset = (offset & ((1<<20)-1)) << 12;
     _adrp(reg1, pc()+offset);
-    movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32);
+    movk(reg1, (unsigned long)dest.target() >> 32, 32);
   }
   byte_offset = (unsigned long)dest.target() & 0xfff;
 }
 
+void MacroAssembler::load_byte_map_base(Register reg) {
+  jbyte *byte_map_base =
+    ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base;
+
+  if (is_valid_AArch64_address((address)byte_map_base)) {
+    // Strictly speaking the byte_map_base isn't an address at all,
+    // and it might even be negative.
+    unsigned long offset;
+    adrp(reg, ExternalAddress((address)byte_map_base), offset);
+    assert(offset == 0, "misaligned card table base");
+  } else {
+    mov(reg, (uint64_t)byte_map_base);
+  }
+}
+
 void MacroAssembler::build_frame(int framesize) {
   assert(framesize > 0, "framesize must be > 0");
   if (framesize < ((1 << 9) + 2 * wordSize)) {
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Jan 19 17:52:52 2016 +0000
@@ -437,6 +437,13 @@
   void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
   void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
 
+  // Push and pop everything that might be clobbered by a native
+  // runtime call except rscratch1 and rscratch2.  (They are always
+  // scratch, so we don't have to protect them.)  Only save the lower
+  // 64 bits of each vector register.
+  void push_call_clobbered_registers();
+  void pop_call_clobbered_registers();
+
   // now mov instructions for loading absolute addresses and 32 or
   // 64 bit integers
 
@@ -1116,6 +1123,15 @@
   // of your data.
   Address form_address(Register Rd, Register base, long byte_offset, int shift);
 
+  // Return true iff an address is within the 48-bit AArch64 address
+  // space.
+  bool is_valid_AArch64_address(address a) {
+    return ((uint64_t)a >> 48) == 0;
+  }
+
+  // Load the base of the cardtable byte map into reg.
+  void load_byte_map_base(Register reg);
+
   // Prolog generator routines to support switch between x86 code and
   // generated ARM code
 
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Mon Jan 18 21:34:28 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Tue Jan 19 17:52:52 2016 +0000
@@ -744,7 +744,7 @@
            __ sub(end, end, start); // number of bytes to copy
 
           const Register count = end; // 'end' register contains bytes count now
-          __ mov(scratch, (address)ct->byte_map_base);
+          __ load_byte_map_base(scratch);
           __ add(start, start, scratch);
           if (UseConcMarkSweepGC) {
             __ membar(__ StoreStore);