8169177: AArch64: SIGSEGV when "-XX:+ZeroTLAB" is specified along with GC options
Reviewed-by: aph
Contributed-by: kavitha.natarajan@linaro.org
--- a/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Mon Dec 19 02:33:30 2016 -0800
@@ -195,95 +195,22 @@
}
}
-// Zero words; len is in bytes
-// Destroys all registers except addr
-// len must be a nonzero multiple of wordSize
-void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
- assert_different_registers(addr, len, t1, rscratch1, rscratch2);
-
-#ifdef ASSERT
- { Label L;
- tst(len, BytesPerWord - 1);
- br(Assembler::EQ, L);
- stop("len is not a multiple of BytesPerWord");
- bind(L);
- }
-#endif
-
-#ifndef PRODUCT
- block_comment("zero memory");
-#endif
-
- Label loop;
- Label entry;
-
-// Algorithm:
-//
-// scratch1 = cnt & 7;
-// cnt -= scratch1;
-// p += scratch1;
-// switch (scratch1) {
-// do {
-// cnt -= 8;
-// p[-8] = 0;
-// case 7:
-// p[-7] = 0;
-// case 6:
-// p[-6] = 0;
-// // ...
-// case 1:
-// p[-1] = 0;
-// case 0:
-// p += 8;
-// } while (cnt);
-// }
-
- const int unroll = 8; // Number of str(zr) instructions we'll unroll
-
- lsr(len, len, LogBytesPerWord);
- andr(rscratch1, len, unroll - 1); // tmp1 = cnt % unroll
- sub(len, len, rscratch1); // cnt -= unroll
- // t1 always points to the end of the region we're about to zero
- add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
- adr(rscratch2, entry);
- sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
- br(rscratch2);
- bind(loop);
- sub(len, len, unroll);
- for (int i = -unroll; i < 0; i++)
- str(zr, Address(t1, i * wordSize));
- bind(entry);
- add(t1, t1, unroll * wordSize);
- cbnz(len, loop);
-}
-
// preserves obj, destroys len_in_bytes
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
+ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
Label done;
- assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
- assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
- Register index = len_in_bytes;
- // index is positive and ptr sized
- subs(index, index, hdr_size_in_bytes);
+
+ // len_in_bytes is positive and ptr sized
+ subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
br(Assembler::EQ, done);
- // note: for the remaining code to work, index must be a multiple of BytesPerWord
-#ifdef ASSERT
- { Label L;
- tst(index, BytesPerWord - 1);
- br(Assembler::EQ, L);
- stop("index is not a multiple of BytesPerWord");
- bind(L);
- }
-#endif
// Preserve obj
if (hdr_size_in_bytes)
add(obj, obj, hdr_size_in_bytes);
- zero_memory(obj, index, t1);
+ zero_memory(obj, len_in_bytes, t1);
if (hdr_size_in_bytes)
sub(obj, obj, hdr_size_in_bytes);
- // done
bind(done);
}
@@ -294,57 +221,59 @@
try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
- initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
+ initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
}
-void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
"con_size_in_bytes is not multiple of alignment");
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
initialize_header(obj, klass, noreg, t1, t2);
- // clear rest of allocated space
- const Register index = t2;
- const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below)
- if (var_size_in_bytes != noreg) {
- mov(index, var_size_in_bytes);
- initialize_body(obj, index, hdr_size_in_bytes, t1);
- } else if (con_size_in_bytes <= threshold) {
- // use explicit null stores
- int i = hdr_size_in_bytes;
- if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
- str(zr, Address(obj, i));
- i += BytesPerWord;
- }
- for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
- stp(zr, zr, Address(obj, i));
- } else if (con_size_in_bytes > hdr_size_in_bytes) {
- block_comment("zero memory");
- // use loop to null out the fields
+ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
+ // clear rest of allocated space
+ const Register index = t2;
+ const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below)
+ if (var_size_in_bytes != noreg) {
+ mov(index, var_size_in_bytes);
+ initialize_body(obj, index, hdr_size_in_bytes, t1);
+ } else if (con_size_in_bytes <= threshold) {
+ // use explicit null stores
+ int i = hdr_size_in_bytes;
+ if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
+ str(zr, Address(obj, i));
+ i += BytesPerWord;
+ }
+ for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
+ stp(zr, zr, Address(obj, i));
+ } else if (con_size_in_bytes > hdr_size_in_bytes) {
+ block_comment("zero memory");
+ // use loop to null out the fields
- int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
- mov(index, words / 8);
+ int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
+ mov(index, words / 8);
- const int unroll = 8; // Number of str(zr) instructions we'll unroll
- int remainder = words % unroll;
- lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
+ const int unroll = 8; // Number of str(zr) instructions we'll unroll
+ int remainder = words % unroll;
+ lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
- Label entry_point, loop;
- b(entry_point);
+ Label entry_point, loop;
+ b(entry_point);
- bind(loop);
- sub(index, index, 1);
- for (int i = -unroll; i < 0; i++) {
- if (-i == remainder)
- bind(entry_point);
- str(zr, Address(rscratch1, i * wordSize));
- }
- if (remainder == 0)
- bind(entry_point);
- add(rscratch1, rscratch1, unroll * wordSize);
- cbnz(index, loop);
+ bind(loop);
+ sub(index, index, 1);
+ for (int i = -unroll; i < 0; i++) {
+ if (-i == remainder)
+ bind(entry_point);
+ str(zr, Address(rscratch1, i * wordSize));
+ }
+ if (remainder == 0)
+ bind(entry_point);
+ add(rscratch1, rscratch1, unroll * wordSize);
+ cbnz(index, loop);
+ }
}
membar(StoreStore);
--- a/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp Mon Dec 19 02:33:30 2016 -0800
@@ -36,7 +36,6 @@
// initialization
void pd_init() { _rsp_offset = 0; }
-void zero_memory(Register addr, Register len, Register t1);
public:
void try_allocate(
@@ -75,7 +74,8 @@
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time
Register t1, // temp register
- Register t2 // temp register
+ Register t2, // temp register
+ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
);
// allocation of fixed-size objects
--- a/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Mon Dec 19 02:33:30 2016 -0800
@@ -728,7 +728,7 @@
__ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
- __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+ __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
__ verify_oop(obj);
__ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
__ ret(lr);
@@ -740,7 +740,7 @@
__ eden_allocate(obj, obj_size, 0, t1, slow_path);
__ incr_allocated_bytes(rthread, obj_size, 0, rscratch1);
- __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+ __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
__ verify_oop(obj);
__ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
__ ret(lr);
@@ -853,7 +853,9 @@
__ andr(t1, t1, Klass::_lh_header_size_mask);
__ sub(arr_size, arr_size, t1); // body length
__ add(t1, t1, obj); // body start
- __ initialize_body(t1, arr_size, 0, t2);
+ if (!ZeroTLAB) {
+ __ initialize_body(t1, arr_size, 0, t2);
+ }
__ verify_oop(obj);
__ ret(lr);
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Dec 19 02:33:30 2016 -0800
@@ -3944,12 +3944,82 @@
add(top, top, t1);
sub(top, top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
str(top, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
+
+ if (ZeroTLAB) {
+ // This is a fast TLAB refill, therefore the GC is not notified of it.
+ // So compiled code must fill the new TLAB with zeroes.
+ ldr(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
+ zero_memory(top,t1,t2);
+ }
+
verify_tlab();
b(retry);
return rthread; // for use by caller
}
+// Zero words; len is in bytes
+// Destroys all registers except addr
+// len must be a nonzero multiple of wordSize
+void MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
+ assert_different_registers(addr, len, t1, rscratch1, rscratch2);
+
+#ifdef ASSERT
+ { Label L;
+ tst(len, BytesPerWord - 1);
+ br(Assembler::EQ, L);
+ stop("len is not a multiple of BytesPerWord");
+ bind(L);
+ }
+#endif
+
+#ifndef PRODUCT
+ block_comment("zero memory");
+#endif
+
+ Label loop;
+ Label entry;
+
+// Algorithm:
+//
+// scratch1 = cnt & 7;
+// cnt -= scratch1;
+// p += scratch1;
+// switch (scratch1) {
+// do {
+// cnt -= 8;
+// p[-8] = 0;
+// case 7:
+// p[-7] = 0;
+// case 6:
+// p[-6] = 0;
+// // ...
+// case 1:
+// p[-1] = 0;
+// case 0:
+// p += 8;
+// } while (cnt);
+// }
+
+ const int unroll = 8; // Number of str(zr) instructions we'll unroll
+
+ lsr(len, len, LogBytesPerWord);
+ andr(rscratch1, len, unroll - 1); // tmp1 = cnt % unroll
+ sub(len, len, rscratch1); // cnt -= unroll
+ // t1 always points to the end of the region we're about to zero
+ add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
+ adr(rscratch2, entry);
+ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
+ br(rscratch2);
+ bind(loop);
+ sub(len, len, unroll);
+ for (int i = -unroll; i < 0; i++)
+ str(zr, Address(t1, i * wordSize));
+ bind(entry);
+ add(t1, t1, unroll * wordSize);
+ cbnz(len, loop);
+}
+
// Defines obj, preserves var_size_in_bytes
void MacroAssembler::eden_allocate(Register obj,
Register var_size_in_bytes,
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Dec 19 08:31:01 2016 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Dec 19 02:33:30 2016 -0800
@@ -857,6 +857,7 @@
Label& slow_case // continuation point if fast allocation fails
);
Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
+ void zero_memory(Register addr, Register len, Register t1);
void verify_tlab();
void incr_allocated_bytes(Register thread,