8086053: Address inconsistencies regarding ZeroTLAB
Summary: Add zero-initialization to C1 for fast TLAB refills; strenghten C2 conditions for skipping zero-initialization.
Reviewed-by: kvn, thartmann
--- a/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -205,12 +205,7 @@
void C1_MacroAssembler::initialize_body(Register base, Register index) {
- assert_different_registers(base, index);
- Label loop;
- bind(loop);
- subcc(index, HeapWordSize, index);
- brx(Assembler::greaterEqual, true, Assembler::pt, loop);
- delayed()->st_ptr(G0, base, index);
+ zero_memory(base, index);
}
@@ -237,7 +232,7 @@
}
try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case);
- initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2);
+ initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2, /* is_tlab_allocated */ UseTLAB);
}
void C1_MacroAssembler::initialize_object(
@@ -246,7 +241,8 @@
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time
Register t1, // temp register
- Register t2 // temp register
+ Register t2, // temp register
+ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
) {
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
@@ -269,31 +265,33 @@
#endif
- // initialize body
- const int threshold = 5 * HeapWordSize; // approximate break even point for code size
- if (var_size_in_bytes != noreg) {
- // use a loop
- add(obj, hdr_size_in_bytes, t1); // compute address of first element
- sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body
- initialize_body(t1, t2);
+ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
+ // initialize body
+ const int threshold = 5 * HeapWordSize; // approximate break even point for code size
+ if (var_size_in_bytes != noreg) {
+ // use a loop
+ add(obj, hdr_size_in_bytes, t1); // compute address of first element
+ sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body
+ initialize_body(t1, t2);
#ifndef _LP64
- } else if (con_size_in_bytes < threshold * 2) {
- // on v9 we can do double word stores to fill twice as much space.
- assert(hdr_size_in_bytes % 8 == 0, "double word aligned");
- assert(con_size_in_bytes % 8 == 0, "double word aligned");
- for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i);
+ } else if (con_size_in_bytes < threshold * 2) {
+ // on v9 we can do double word stores to fill twice as much space.
+ assert(hdr_size_in_bytes % 8 == 0, "double word aligned");
+ assert(con_size_in_bytes % 8 == 0, "double word aligned");
+ for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i);
#endif
- } else if (con_size_in_bytes <= threshold) {
- // use explicit NULL stores
- for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i);
- } else if (con_size_in_bytes > hdr_size_in_bytes) {
- // use a loop
- const Register base = t1;
- const Register index = t2;
- add(obj, hdr_size_in_bytes, base); // compute address of first element
- // compute index = number of words to clear
- set(con_size_in_bytes - hdr_size_in_bytes, index);
- initialize_body(base, index);
+ } else if (con_size_in_bytes <= threshold) {
+ // use explicit NULL stores
+ for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i);
+ } else if (con_size_in_bytes > hdr_size_in_bytes) {
+ // use a loop
+ const Register base = t1;
+ const Register index = t2;
+ add(obj, hdr_size_in_bytes, base); // compute address of first element
+ // compute index = number of words to clear
+ set(con_size_in_bytes - hdr_size_in_bytes, index);
+ initialize_body(base, index);
+ }
}
if (CURRENT_ENV->dtrace_alloc_probes()) {
--- a/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp Tue Jan 12 09:19:09 2016 +0100
@@ -50,7 +50,8 @@
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time
Register t1, // temp register
- Register t2 // temp register
+ Register t2, // temp register
+ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
);
// allocation of fixed-size objects
--- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -435,7 +435,7 @@
__ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
- __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
+ __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ true);
__ verify_oop(O0_obj);
__ mov(O0, I0);
__ ret();
@@ -447,7 +447,7 @@
__ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
__ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);
- __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
+ __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ false);
__ verify_oop(O0_obj);
__ mov(O0, I0);
__ ret();
@@ -542,7 +542,9 @@
__ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
__ sub(G1_arr_size, G3_t1, O1_t2); // body length
__ add(O0_obj, G3_t1, G3_t1); // body start
- __ initialize_body(G3_t1, O1_t2);
+ if (!ZeroTLAB) {
+ __ initialize_body(G3_t1, O1_t2);
+ }
__ verify_oop(O0_obj);
__ retl();
__ delayed()->nop();
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -3459,11 +3459,27 @@
add(top, t1, top); // t1 is tlab_size
sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
+
+ if (ZeroTLAB) {
+ // This is a fast TLAB refill, therefore the GC is not notified of it.
+ // So compiled code must fill the new TLAB with zeroes.
+ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
+ zero_memory(t2, t1);
+ }
verify_tlab();
ba(retry);
delayed()->nop();
}
+void MacroAssembler::zero_memory(Register base, Register index) {
+ assert_different_registers(base, index);
+ Label loop;
+ bind(loop);
+ subcc(index, HeapWordSize, index);
+ brx(Assembler::greaterEqual, true, Assembler::pt, loop);
+ delayed()->st_ptr(G0, base, index);
+}
+
void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
Register t1, Register t2) {
// Bump total bytes allocated by this thread
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp Tue Jan 12 09:19:09 2016 +0100
@@ -1278,6 +1278,7 @@
Label& slow_case // continuation point if fast allocation fails
);
void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
+ void zero_memory(Register base, Register index);
void incr_allocated_bytes(RegisterOrConstant size_in_bytes,
Register t1, Register t2);
--- a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -182,54 +182,13 @@
// preserves obj, destroys len_in_bytes
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
+ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
Label done;
- assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
- assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
- Register index = len_in_bytes;
- // index is positive and ptr sized
- subptr(index, hdr_size_in_bytes);
+
+ // len_in_bytes is positive and ptr sized
+ subptr(len_in_bytes, hdr_size_in_bytes);
jcc(Assembler::zero, done);
- // initialize topmost word, divide index by 2, check if odd and test if zero
- // note: for the remaining code to work, index must be a multiple of BytesPerWord
-#ifdef ASSERT
- { Label L;
- testptr(index, BytesPerWord - 1);
- jcc(Assembler::zero, L);
- stop("index is not a multiple of BytesPerWord");
- bind(L);
- }
-#endif
- xorptr(t1, t1); // use _zero reg to clear memory (shorter code)
- if (UseIncDec) {
- shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
- } else {
- shrptr(index, 2); // use 2 instructions to avoid partial flag stall
- shrptr(index, 1);
- }
-#ifndef _LP64
- // index could have been not a multiple of 8 (i.e., bit 2 was set)
- { Label even;
- // note: if index was a multiple of 8, than it cannot
- // be 0 now otherwise it must have been 0 before
- // => if it is even, we don't need to check for 0 again
- jcc(Assembler::carryClear, even);
- // clear topmost word (no jump needed if conditional assignment would work here)
- movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 0*BytesPerWord), t1);
- // index could be 0 now, need to check again
- jcc(Assembler::zero, done);
- bind(even);
- }
-#endif // !_LP64
- // initialize remaining object fields: rdx is a multiple of 2 now
- { Label loop;
- bind(loop);
- movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 1*BytesPerWord), t1);
- NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 2*BytesPerWord), t1);)
- decrement(index);
- jcc(Assembler::notZero, loop);
- }
-
- // done
+ zero_memory(obj, len_in_bytes, hdr_size_in_bytes, t1);
bind(done);
}
@@ -241,47 +200,49 @@
try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
- initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
+ initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
}
-void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
"con_size_in_bytes is not multiple of alignment");
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
initialize_header(obj, klass, noreg, t1, t2);
- // clear rest of allocated space
- const Register t1_zero = t1;
- const Register index = t2;
- const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below)
- if (var_size_in_bytes != noreg) {
- mov(index, var_size_in_bytes);
- initialize_body(obj, index, hdr_size_in_bytes, t1_zero);
- } else if (con_size_in_bytes <= threshold) {
- // use explicit null stores
- // code size = 2 + 3*n bytes (n = number of fields to clear)
- xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
- for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
- movptr(Address(obj, i), t1_zero);
- } else if (con_size_in_bytes > hdr_size_in_bytes) {
- // use loop to null out the fields
- // code size = 16 bytes for even n (n = number of fields to clear)
- // initialize last object field first if odd number of fields
- xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
- movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
- // initialize last object field if constant size is odd
- if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0)
- movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero);
- // initialize remaining object fields: rdx is a multiple of 2
- { Label loop;
- bind(loop);
- movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)),
- t1_zero);
- NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)),
- t1_zero);)
- decrement(index);
- jcc(Assembler::notZero, loop);
+ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
+ // clear rest of allocated space
+ const Register t1_zero = t1;
+ const Register index = t2;
+ const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below)
+ if (var_size_in_bytes != noreg) {
+ mov(index, var_size_in_bytes);
+ initialize_body(obj, index, hdr_size_in_bytes, t1_zero);
+ } else if (con_size_in_bytes <= threshold) {
+ // use explicit null stores
+ // code size = 2 + 3*n bytes (n = number of fields to clear)
+ xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
+ for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
+ movptr(Address(obj, i), t1_zero);
+ } else if (con_size_in_bytes > hdr_size_in_bytes) {
+ // use loop to null out the fields
+ // code size = 16 bytes for even n (n = number of fields to clear)
+ // initialize last object field first if odd number of fields
+ xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
+ movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
+ // initialize last object field if constant size is odd
+ if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0)
+ movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero);
+ // initialize remaining object fields: rdx is a multiple of 2
+ { Label loop;
+ bind(loop);
+ movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)),
+ t1_zero);
+ NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)),
+ t1_zero);)
+ decrement(index);
+ jcc(Assembler::notZero, loop);
+ }
}
}
--- a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.hpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.hpp Tue Jan 12 09:19:09 2016 +0100
@@ -65,7 +65,8 @@
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time
Register t1, // temp register
- Register t2 // temp register
+ Register t2, // temp register
+ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
);
// allocation of fixed-size objects
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -1040,7 +1040,7 @@
__ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
- __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+ __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
__ verify_oop(obj);
__ pop(rbx);
__ pop(rdi);
@@ -1053,7 +1053,7 @@
__ eden_allocate(obj, obj_size, 0, t1, slow_path);
__ incr_allocated_bytes(thread, obj_size, 0);
- __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+ __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
__ verify_oop(obj);
__ pop(rbx);
__ pop(rdi);
@@ -1169,7 +1169,9 @@
__ andptr(t1, Klass::_lh_header_size_mask);
__ subptr(arr_size, t1); // body length
__ addptr(t1, obj); // body start
- __ initialize_body(t1, arr_size, 0, t2);
+ if (!ZeroTLAB) {
+ __ initialize_body(t1, arr_size, 0, t2);
+ }
__ verify_oop(obj);
__ ret(0);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -5426,7 +5426,7 @@
Label& try_eden,
Label& slow_case) {
Register top = rax;
- Register t1 = rcx;
+ Register t1 = rcx; // object size
Register t2 = rsi;
Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
@@ -5522,12 +5522,76 @@
addptr(top, t1);
subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
+
+ if (ZeroTLAB) {
+ // This is a fast TLAB refill, therefore the GC is not notified of it.
+ // So compiled code must fill the new TLAB with zeroes.
+ movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
+ zero_memory(top, t1, 0, t2);
+ }
+
verify_tlab();
jmp(retry);
return thread_reg; // for use by caller
}
+// Preserves the contents of address, destroys the contents length_in_bytes and temp.
+void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
+ assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different");
+ assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord");
+ Label done;
+
+ testptr(length_in_bytes, length_in_bytes);
+ jcc(Assembler::zero, done);
+
+ // initialize topmost word, divide index by 2, check if odd and test if zero
+ // note: for the remaining code to work, index must be a multiple of BytesPerWord
+#ifdef ASSERT
+ {
+ Label L;
+ testptr(length_in_bytes, BytesPerWord - 1);
+ jcc(Assembler::zero, L);
+ stop("length must be a multiple of BytesPerWord");
+ bind(L);
+ }
+#endif
+ Register index = length_in_bytes;
+ xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
+ if (UseIncDec) {
+ shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
+ } else {
+ shrptr(index, 2); // use 2 instructions to avoid partial flag stall
+ shrptr(index, 1);
+ }
+#ifndef _LP64
+ // index could have not been a multiple of 8 (i.e., bit 2 was set)
+ {
+ Label even;
+ // note: if index was a multiple of 8, then it cannot
+ // be 0 now otherwise it must have been 0 before
+ // => if it is even, we don't need to check for 0 again
+ jcc(Assembler::carryClear, even);
+ // clear topmost word (no jump would be needed if conditional assignment worked here)
+ movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
+ // index could be 0 now, must check again
+ jcc(Assembler::zero, done);
+ bind(even);
+ }
+#endif // !_LP64
+ // initialize remaining object fields: index is a multiple of 2 now
+ {
+ Label loop;
+ bind(loop);
+ movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
+ NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
+ decrement(index);
+ jcc(Assembler::notZero, loop);
+ }
+
+ bind(done);
+}
+
void MacroAssembler::incr_allocated_bytes(Register thread,
Register var_size_in_bytes,
int con_size_in_bytes,
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jan 12 09:19:09 2016 +0100
@@ -522,6 +522,8 @@
Label& slow_case // continuation point if fast allocation fails
);
Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
+ void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
+
void incr_allocated_bytes(Register thread,
Register var_size_in_bytes, int con_size_in_bytes,
Register t1 = noreg);
--- a/hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -105,7 +105,7 @@
// an illusion of a contiguous Eden and optionally retires the tlab.
// Waste accounting should be done in caller as appropriate; see,
// for example, clear_before_allocation().
-void ThreadLocalAllocBuffer::make_parsable(bool retire) {
+void ThreadLocalAllocBuffer::make_parsable(bool retire, bool zap) {
if (end() != NULL) {
invariants();
@@ -113,7 +113,7 @@
myThread()->incr_allocated_bytes(used_bytes());
}
- CollectedHeap::fill_with_object(top(), hard_end(), retire);
+ CollectedHeap::fill_with_object(top(), hard_end(), retire && zap);
if (retire || ZeroTLAB) { // "Reset" the TLAB
set_start(NULL);
--- a/hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp Tue Jan 12 09:19:09 2016 +0100
@@ -145,8 +145,8 @@
// Initialization at startup
static void startup_initialization();
- // Make an in-use tlab parsable, optionally also retiring it.
- void make_parsable(bool retire);
+ // Make an in-use tlab parsable, optionally retiring and/or zapping it.
+ void make_parsable(bool retire, bool zap = true);
// Retire in-use tlab before allocation of a new tlab
void clear_before_allocation();
--- a/hotspot/src/share/vm/opto/library_call.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/share/vm/opto/library_call.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -3077,7 +3077,7 @@
set_control( _gvn.transform(new IfTrueNode(iff_arg)));
#else
// To return true on Windows you must read the _interrupted field
- // and check the the event state i.e. take the slow path.
+ // and check the event state i.e. take the slow path.
#endif // TARGET_OS_FAMILY_windows
// (d) Otherwise, go to the slow path.
--- a/hotspot/src/share/vm/opto/macro.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/share/vm/opto/macro.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -1813,10 +1813,11 @@
// there can be two Allocates to one Initialize. The answer in all these
// edge cases is safety first. It is always safe to clear immediately
// within an Allocate, and then (maybe or maybe not) clear some more later.
- if (!ZeroTLAB)
+ if (!(UseTLAB && ZeroTLAB)) {
rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
header_size, size_in_bytes,
&_igvn);
+ }
} else {
if (!init->is_complete()) {
// Try to win by zeroing only what the init does not store.
--- a/hotspot/src/share/vm/opto/macroArrayCopy.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/share/vm/opto/macroArrayCopy.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -295,7 +295,7 @@
// out-edges of the dest, we need to avoid making derived pointers
// from it until we have checked its uses.)
if (ReduceBulkZeroing
- && !ZeroTLAB // pointless if already zeroed
+ && !(UseTLAB && ZeroTLAB) // pointless if already zeroed
&& basic_elem_type != T_CONFLICT // avoid corner case
&& !src->eqv_uncast(dest)
&& alloc != NULL
--- a/hotspot/src/share/vm/opto/memnode.cpp Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/src/share/vm/opto/memnode.cpp Tue Jan 12 09:19:09 2016 +0100
@@ -3850,7 +3850,7 @@
bool do_zeroing = true; // we might give up if inits are very sparse
int big_init_gaps = 0; // how many large gaps have we seen?
- if (ZeroTLAB) do_zeroing = false;
+ if (UseTLAB && ZeroTLAB) do_zeroing = false;
if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false;
for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
@@ -3951,7 +3951,7 @@
remove_extra_zeroes(); // clear out all the zmems left over
add_req(inits);
- if (!ZeroTLAB) {
+ if (!(UseTLAB && ZeroTLAB)) {
// If anything remains to be zeroed, zero it all now.
zeroes_done = align_size_down(zeroes_done, BytesPerInt);
// if it is the last unused 4 bytes of an instance, forget about it
--- a/hotspot/test/TEST.groups Mon Jan 11 14:23:35 2016 +0100
+++ b/hotspot/test/TEST.groups Tue Jan 12 09:19:09 2016 +0100
@@ -288,6 +288,7 @@
compiler/jsr292/ \
compiler/loopopts/ \
compiler/macronodes/ \
+ compiler/memoryinitialization/ \
compiler/osr/ \
compiler/regalloc/ \
compiler/runtime/ \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/memoryinitialization/ZeroTLABTest.java Tue Jan 12 09:19:09 2016 +0100
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 8086053
+ * @run main/othervm -Xcomp -XX:+UseTLAB -XX:+ZeroTLAB ZeroTLABTest
+ * @run main/othervm -Xcomp -XX:+UseTLAB -XX:-ZeroTLAB ZeroTLABTest
+ * @run main/othervm -Xcomp -XX:-UseTLAB -XX:+ZeroTLAB ZeroTLABTest
+ * @run main/othervm -Xcomp -XX:-UseTLAB -XX:-ZeroTLAB ZeroTLABTest
+ */
+public class ZeroTLABTest {
+ public static void main(String args[]) {
+ System.out.println("Test PASSED");
+ }
+}