8169177: AArch64: SIGSEGV when "-XX:+ZeroTLAB" is specified along with GC options
authoraph
Mon, 19 Dec 2016 02:33:30 -0800
changeset 42871 c89e1f0a084e
parent 42870 525f24ac5db0
child 42873 8ddcd9d9fc48
child 42874 973960866fa4
8169177: AArch64: SIGSEGV when "-XX:+ZeroTLAB" is specified along with GC options Reviewed-by: aph Contributed-by: kavitha.natarajan@linaro.org
hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp	Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp	Mon Dec 19 02:33:30 2016 -0800
@@ -195,95 +195,22 @@
   }
 }
 
-// Zero words; len is in bytes
-// Destroys all registers except addr
-// len must be a nonzero multiple of wordSize
-void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
-  assert_different_registers(addr, len, t1, rscratch1, rscratch2);
-
-#ifdef ASSERT
-  { Label L;
-    tst(len, BytesPerWord - 1);
-    br(Assembler::EQ, L);
-    stop("len is not a multiple of BytesPerWord");
-    bind(L);
-  }
-#endif
-
-#ifndef PRODUCT
-  block_comment("zero memory");
-#endif
-
-  Label loop;
-  Label entry;
-
-//  Algorithm:
-//
-//    scratch1 = cnt & 7;
-//    cnt -= scratch1;
-//    p += scratch1;
-//    switch (scratch1) {
-//      do {
-//        cnt -= 8;
-//          p[-8] = 0;
-//        case 7:
-//          p[-7] = 0;
-//        case 6:
-//          p[-6] = 0;
-//          // ...
-//        case 1:
-//          p[-1] = 0;
-//        case 0:
-//          p += 8;
-//      } while (cnt);
-//    }
-
-  const int unroll = 8; // Number of str(zr) instructions we'll unroll
-
-  lsr(len, len, LogBytesPerWord);
-  andr(rscratch1, len, unroll - 1);  // tmp1 = cnt % unroll
-  sub(len, len, rscratch1);      // cnt -= unroll
-  // t1 always points to the end of the region we're about to zero
-  add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
-  adr(rscratch2, entry);
-  sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
-  br(rscratch2);
-  bind(loop);
-  sub(len, len, unroll);
-  for (int i = -unroll; i < 0; i++)
-    str(zr, Address(t1, i * wordSize));
-  bind(entry);
-  add(t1, t1, unroll * wordSize);
-  cbnz(len, loop);
-}
-
 // preserves obj, destroys len_in_bytes
 void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
+  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
   Label done;
-  assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
-  assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
-  Register index = len_in_bytes;
-  // index is positive and ptr sized
-  subs(index, index, hdr_size_in_bytes);
+
+  // len_in_bytes is positive and ptr sized
+  subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
   br(Assembler::EQ, done);
-  // note: for the remaining code to work, index must be a multiple of BytesPerWord
-#ifdef ASSERT
-  { Label L;
-    tst(index, BytesPerWord - 1);
-    br(Assembler::EQ, L);
-    stop("index is not a multiple of BytesPerWord");
-    bind(L);
-  }
-#endif
 
   // Preserve obj
   if (hdr_size_in_bytes)
     add(obj, obj, hdr_size_in_bytes);
-  zero_memory(obj, index, t1);
+  zero_memory(obj, len_in_bytes, t1);
   if (hdr_size_in_bytes)
     sub(obj, obj, hdr_size_in_bytes);
 
-  // done
   bind(done);
 }
 
@@ -294,57 +221,59 @@
 
   try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
 
-  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
+  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
 }
 
-void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
          "con_size_in_bytes is not multiple of alignment");
   const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
 
   initialize_header(obj, klass, noreg, t1, t2);
 
-  // clear rest of allocated space
-  const Register index = t2;
-  const int threshold = 16 * BytesPerWord;   // approximate break even point for code size (see comments below)
-  if (var_size_in_bytes != noreg) {
-    mov(index, var_size_in_bytes);
-    initialize_body(obj, index, hdr_size_in_bytes, t1);
-  } else if (con_size_in_bytes <= threshold) {
-    // use explicit null stores
-    int i = hdr_size_in_bytes;
-    if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
-      str(zr, Address(obj, i));
-      i += BytesPerWord;
-    }
-    for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
-      stp(zr, zr, Address(obj, i));
-  } else if (con_size_in_bytes > hdr_size_in_bytes) {
-    block_comment("zero memory");
-    // use loop to null out the fields
+  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
+     // clear rest of allocated space
+     const Register index = t2;
+     const int threshold = 16 * BytesPerWord;   // approximate break even point for code size (see comments below)
+     if (var_size_in_bytes != noreg) {
+       mov(index, var_size_in_bytes);
+       initialize_body(obj, index, hdr_size_in_bytes, t1);
+     } else if (con_size_in_bytes <= threshold) {
+       // use explicit null stores
+       int i = hdr_size_in_bytes;
+       if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
+         str(zr, Address(obj, i));
+         i += BytesPerWord;
+       }
+       for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
+         stp(zr, zr, Address(obj, i));
+     } else if (con_size_in_bytes > hdr_size_in_bytes) {
+       block_comment("zero memory");
+      // use loop to null out the fields
 
-    int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
-    mov(index,  words / 8);
+       int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
+       mov(index,  words / 8);
 
-    const int unroll = 8; // Number of str(zr) instructions we'll unroll
-    int remainder = words % unroll;
-    lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
+       const int unroll = 8; // Number of str(zr) instructions we'll unroll
+       int remainder = words % unroll;
+       lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
 
-    Label entry_point, loop;
-    b(entry_point);
+       Label entry_point, loop;
+       b(entry_point);
 
-    bind(loop);
-    sub(index, index, 1);
-    for (int i = -unroll; i < 0; i++) {
-      if (-i == remainder)
-        bind(entry_point);
-      str(zr, Address(rscratch1, i * wordSize));
-    }
-    if (remainder == 0)
-      bind(entry_point);
-    add(rscratch1, rscratch1, unroll * wordSize);
-    cbnz(index, loop);
+       bind(loop);
+       sub(index, index, 1);
+       for (int i = -unroll; i < 0; i++) {
+         if (-i == remainder)
+           bind(entry_point);
+         str(zr, Address(rscratch1, i * wordSize));
+       }
+       if (remainder == 0)
+         bind(entry_point);
+       add(rscratch1, rscratch1, unroll * wordSize);
+       cbnz(index, loop);
 
+     }
   }
 
   membar(StoreStore);
--- a/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp	Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp	Mon Dec 19 02:33:30 2016 -0800
@@ -36,7 +36,6 @@
   // initialization
   void pd_init() { _rsp_offset = 0; }
 
-void zero_memory(Register addr, Register len, Register t1);
 
  public:
   void try_allocate(
@@ -75,7 +74,8 @@
     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
     int      con_size_in_bytes,        // object size in bytes if   known at compile time
     Register t1,                       // temp register
-    Register t2                        // temp register
+    Register t2,                        // temp register
+    bool     is_tlab_allocated         // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
   );
 
   // allocation of fixed-size objects
--- a/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Mon Dec 19 02:33:30 2016 -0800
@@ -728,7 +728,7 @@
 
           __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
 
-          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
           __ verify_oop(obj);
           __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
           __ ret(lr);
@@ -740,7 +740,7 @@
           __ eden_allocate(obj, obj_size, 0, t1, slow_path);
           __ incr_allocated_bytes(rthread, obj_size, 0, rscratch1);
 
-          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
           __ verify_oop(obj);
           __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
           __ ret(lr);
@@ -853,7 +853,9 @@
           __ andr(t1, t1, Klass::_lh_header_size_mask);
           __ sub(arr_size, arr_size, t1);  // body length
           __ add(t1, t1, obj);       // body start
-          __ initialize_body(t1, arr_size, 0, t2);
+          if (!ZeroTLAB) {
+           __ initialize_body(t1, arr_size, 0, t2);
+          }
           __ verify_oop(obj);
 
           __ ret(lr);
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Mon Dec 19 02:33:30 2016 -0800
@@ -3944,12 +3944,82 @@
   add(top, top, t1);
   sub(top, top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
   str(top, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
+
+  if (ZeroTLAB) {
+    // This is a fast TLAB refill, therefore the GC is not notified of it.
+    // So compiled code must fill the new TLAB with zeroes.
+    ldr(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
+    zero_memory(top,t1,t2);
+  }
+
   verify_tlab();
   b(retry);
 
   return rthread; // for use by caller
 }
 
+// Zero words; len is in bytes
+// Destroys all registers except addr
+// len must be a nonzero multiple of wordSize
+void MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
+  assert_different_registers(addr, len, t1, rscratch1, rscratch2);
+
+#ifdef ASSERT
+  { Label L;
+    tst(len, BytesPerWord - 1);
+    br(Assembler::EQ, L);
+    stop("len is not a multiple of BytesPerWord");
+    bind(L);
+  }
+#endif
+
+#ifndef PRODUCT
+  block_comment("zero memory");
+#endif
+
+  Label loop;
+  Label entry;
+
+//  Algorithm:
+//
+//    scratch1 = cnt & 7;
+//    cnt -= scratch1;
+//    p += scratch1;
+//    switch (scratch1) {
+//      do {
+//        cnt -= 8;
+//          p[-8] = 0;
+//        case 7:
+//          p[-7] = 0;
+//        case 6:
+//          p[-6] = 0;
+//          // ...
+//        case 1:
+//          p[-1] = 0;
+//        case 0:
+//          p += 8;
+//      } while (cnt);
+//    }
+
+  const int unroll = 8; // Number of str(zr) instructions we'll unroll
+
+  lsr(len, len, LogBytesPerWord);
+  andr(rscratch1, len, unroll - 1);  // tmp1 = cnt % unroll
+  sub(len, len, rscratch1);      // cnt -= unroll
+  // t1 always points to the end of the region we're about to zero
+  add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
+  adr(rscratch2, entry);
+  sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
+  br(rscratch2);
+  bind(loop);
+  sub(len, len, unroll);
+  for (int i = -unroll; i < 0; i++)
+    str(zr, Address(t1, i * wordSize));
+  bind(entry);
+  add(t1, t1, unroll * wordSize);
+  cbnz(len, loop);
+}
+
 // Defines obj, preserves var_size_in_bytes
 void MacroAssembler::eden_allocate(Register obj,
                                    Register var_size_in_bytes,
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Dec 19 02:33:30 2016 -0800
@@ -857,6 +857,7 @@
     Label&   slow_case                 // continuation point if fast allocation fails
   );
   Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
+  void zero_memory(Register addr, Register len, Register t1);
   void verify_tlab();
 
   void incr_allocated_bytes(Register thread,