8194084: Obsolete FastTLABRefill and remove the related code
authorjcbeyler
Fri, 16 Feb 2018 14:23:30 -0800
changeset 49010 9010e596f391
parent 49009 1ecb986334cb
child 49011 a0e246b7403a
8194084: Obsolete FastTLABRefill and remove the related code Summary: Remove all FastTLABRefill code and flag Reviewed-by: mdoerr, drwhite, coleenp Contributed-by: jcbeyler@google.com
src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
src/hotspot/cpu/arm/c1_Runtime1_arm.cpp
src/hotspot/cpu/arm/macroAssembler_arm.cpp
src/hotspot/cpu/arm/macroAssembler_arm.hpp
src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp
src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
src/hotspot/cpu/s390/c1_Runtime1_s390.cpp
src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp
src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
src/hotspot/cpu/x86/macroAssembler_x86.cpp
src/hotspot/cpu/x86/macroAssembler_x86.hpp
src/hotspot/share/gc/g1/g1Arguments.cpp
src/hotspot/share/runtime/arguments.cpp
src/hotspot/share/runtime/globals.hpp
--- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -684,15 +684,13 @@
         }
 
         if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
-            UseTLAB && FastTLABRefill) {
+            UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
           Label slow_path;
           Register obj_size = r2;
           Register t1       = r19;
           Register t2       = r4;
           assert_different_registers(klass, obj, obj_size, t1, t2);
 
-          __ stp(r5, r19, Address(__ pre(sp, -2 * wordSize)));
-
           if (id == fast_new_instance_init_check_id) {
             // make sure the klass is initialized
             __ ldrb(rscratch1, Address(klass, InstanceKlass::init_state_offset()));
@@ -716,37 +714,21 @@
           }
 #endif // ASSERT
 
-          // if we got here then the TLAB allocation failed, so try
-          // refilling the TLAB or allocating directly from eden.
-          Label retry_tlab, try_eden;
-          __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy r3 (klass), returns r5
-
-          __ bind(retry_tlab);
-
           // get the instance size (size is postive so movl is fine for 64bit)
           __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
 
-          __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
-
-          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
-          __ verify_oop(obj);
-          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
-          __ ret(lr);
-
-          __ bind(try_eden);
-          // get the instance size (size is postive so movl is fine for 64bit)
-          __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
+          __ str(r19, Address(__ pre(sp, -wordSize)));
 
           __ eden_allocate(obj, obj_size, 0, t1, slow_path);
           __ incr_allocated_bytes(rthread, obj_size, 0, rscratch1);
 
           __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
           __ verify_oop(obj);
-          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
+          __ ldr(r19, Address(__ post(sp, wordSize)));
           __ ret(lr);
 
           __ bind(slow_path);
-          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
+          __ ldr(r19, Address(__ post(sp, wordSize)));
         }
 
         __ enter();
@@ -814,7 +796,7 @@
         }
 #endif // ASSERT
 
-        if (UseTLAB && FastTLABRefill) {
+        if (UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
           Register arr_size = r4;
           Register t1       = r2;
           Register t2       = r5;
@@ -826,45 +808,10 @@
           __ cmpw(length, rscratch1);
           __ br(Assembler::HI, slow_path);
 
-          // if we got here then the TLAB allocation failed, so try
-          // refilling the TLAB or allocating directly from eden.
-          Label retry_tlab, try_eden;
-          const Register thread =
-            __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves r19 & r3, returns rthread
-
-          __ bind(retry_tlab);
-
           // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
           // since size is positive ldrw does right thing on 64bit
           __ ldrw(t1, Address(klass, Klass::layout_helper_offset()));
-          __ lslvw(arr_size, length, t1);
-          __ ubfx(t1, t1, Klass::_lh_header_size_shift,
-                  exact_log2(Klass::_lh_header_size_mask + 1));
-          __ add(arr_size, arr_size, t1);
-          __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
-          __ andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
-
-          __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
-
-          __ initialize_header(obj, klass, length, t1, t2);
-          __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
-          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
-          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
-          __ andr(t1, t1, Klass::_lh_header_size_mask);
-          __ sub(arr_size, arr_size, t1);  // body length
-          __ add(t1, t1, obj);       // body start
-          if (!ZeroTLAB) {
-           __ initialize_body(t1, arr_size, 0, t2);
-          }
-          __ verify_oop(obj);
-
-          __ ret(lr);
-
-          __ bind(try_eden);
-          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
-          // since size is positive ldrw does right thing on 64bit
-          __ ldrw(t1, Address(klass, Klass::layout_helper_offset()));
-          // since size is postive movw does right thing on 64bit
+          // since size is positive movw does right thing on 64bit
           __ movw(arr_size, length);
           __ lslvw(arr_size, length, t1);
           __ ubfx(t1, t1, Klass::_lh_header_size_shift,
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -4096,131 +4096,6 @@
   // verify_tlab();
 }
 
-// Preserves r19, and r3.
-Register MacroAssembler::tlab_refill(Label& retry,
-                                     Label& try_eden,
-                                     Label& slow_case) {
-  Register top = r0;
-  Register t1  = r2;
-  Register t2  = r4;
-  assert_different_registers(top, rthread, t1, t2, /* preserve: */ r19, r3);
-  Label do_refill, discard_tlab;
-
-  if (!Universe::heap()->supports_inline_contig_alloc()) {
-    // No allocation in the shared eden.
-    b(slow_case);
-  }
-
-  ldr(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
-  ldr(t1,  Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
-
-  // calculate amount of free space
-  sub(t1, t1, top);
-  lsr(t1, t1, LogHeapWordSize);
-
-  // Retain tlab and allocate object in shared space if
-  // the amount free in the tlab is too large to discard.
-
-  ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
-  cmp(t1, rscratch1);
-  br(Assembler::LE, discard_tlab);
-
-  // Retain
-  // ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
-  mov(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
-  add(rscratch1, rscratch1, t2);
-  str(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
-
-  if (TLABStats) {
-    // increment number of slow_allocations
-    addmw(Address(rthread, in_bytes(JavaThread::tlab_slow_allocations_offset())),
-         1, rscratch1);
-  }
-  b(try_eden);
-
-  bind(discard_tlab);
-  if (TLABStats) {
-    // increment number of refills
-    addmw(Address(rthread, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1,
-         rscratch1);
-    // accumulate wastage -- t1 is amount free in tlab
-    addmw(Address(rthread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1,
-         rscratch1);
-  }
-
-  // if tlab is currently allocated (top or end != null) then
-  // fill [top, end + alignment_reserve) with array object
-  cbz(top, do_refill);
-
-  // set up the mark word
-  mov(rscratch1, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
-  str(rscratch1, Address(top, oopDesc::mark_offset_in_bytes()));
-  // set the length to the remaining space
-  sub(t1, t1, typeArrayOopDesc::header_size(T_INT));
-  add(t1, t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
-  lsl(t1, t1, log2_intptr(HeapWordSize/sizeof(jint)));
-  strw(t1, Address(top, arrayOopDesc::length_offset_in_bytes()));
-  // set klass to intArrayKlass
-  {
-    unsigned long offset;
-    // dubious reloc why not an oop reloc?
-    adrp(rscratch1, ExternalAddress((address)Universe::intArrayKlassObj_addr()),
-         offset);
-    ldr(t1, Address(rscratch1, offset));
-  }
-  // store klass last.  concurrent gcs assumes klass length is valid if
-  // klass field is not null.
-  store_klass(top, t1);
-
-  mov(t1, top);
-  ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
-  sub(t1, t1, rscratch1);
-  incr_allocated_bytes(rthread, t1, 0, rscratch1);
-
-  // refill the tlab with an eden allocation
-  bind(do_refill);
-  ldr(t1, Address(rthread, in_bytes(JavaThread::tlab_size_offset())));
-  lsl(t1, t1, LogHeapWordSize);
-  // allocate new tlab, address returned in top
-  eden_allocate(top, t1, 0, t2, slow_case);
-
-  // Check that t1 was preserved in eden_allocate.
-#ifdef ASSERT
-  if (UseTLAB) {
-    Label ok;
-    Register tsize = r4;
-    assert_different_registers(tsize, rthread, t1);
-    str(tsize, Address(pre(sp, -16)));
-    ldr(tsize, Address(rthread, in_bytes(JavaThread::tlab_size_offset())));
-    lsl(tsize, tsize, LogHeapWordSize);
-    cmp(t1, tsize);
-    br(Assembler::EQ, ok);
-    STOP("assert(t1 != tlab size)");
-    should_not_reach_here();
-
-    bind(ok);
-    ldr(tsize, Address(post(sp, 16)));
-  }
-#endif
-  str(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
-  str(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
-  add(top, top, t1);
-  sub(top, top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
-  str(top, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
-
-  if (ZeroTLAB) {
-    // This is a fast TLAB refill, therefore the GC is not notified of it.
-    // So compiled code must fill the new TLAB with zeroes.
-    ldr(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
-    zero_memory(top,t1,t2);
-  }
-
-  verify_tlab();
-  b(retry);
-
-  return rthread; // for use by caller
-}
-
 // Zero words; len is in bytes
 // Destroys all registers except addr
 // len must be a nonzero multiple of wordSize
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Fri Feb 16 14:23:30 2018 -0800
@@ -861,7 +861,6 @@
     Register t2,                       // temp register
     Label&   slow_case                 // continuation point if fast allocation fails
   );
-  Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
   void zero_memory(Register addr, Register len, Register t1);
   void verify_tlab();
 
--- a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -722,10 +722,10 @@
         const Register result = R0;
         const Register klass  = R1;
 
-        if (UseTLAB && FastTLABRefill && id != new_instance_id) {
+        if (UseTLAB && Universe::heap()->supports_inline_contig_alloc() && id != new_instance_id) {
           // We come here when TLAB allocation failed.
-          // In this case we either refill TLAB or allocate directly from eden.
-          Label retry_tlab, try_eden, slow_case, slow_case_no_pop;
+          // In this case we try to allocate directly from eden.
+          Label slow_case, slow_case_no_pop;
 
           // Make sure the class is fully initialized
           if (id == fast_new_instance_init_check_id) {
@@ -742,17 +742,6 @@
 
           __ raw_push(R4, R5, LR);
 
-          __ tlab_refill(result, obj_size, tmp1, tmp2, obj_end, try_eden, slow_case);
-
-          __ bind(retry_tlab);
-          __ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset()));
-          __ tlab_allocate(result, obj_end, tmp1, obj_size, slow_case);              // initializes result and obj_end
-          __ initialize_object(result, obj_end, klass, noreg /* len */, tmp1, tmp2,
-                               instanceOopDesc::header_size() * HeapWordSize, -1,
-                               /* is_tlab_allocated */ true);
-          __ raw_pop_and_ret(R4, R5);
-
-          __ bind(try_eden);
           __ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset()));
           __ eden_allocate(result, obj_end, tmp1, tmp2, obj_size, slow_case);        // initializes result and obj_end
           __ incr_allocated_bytes(obj_size, tmp2);
@@ -803,10 +792,10 @@
         const Register klass  = R1;
         const Register length = R2;
 
-        if (UseTLAB && FastTLABRefill) {
+        if (UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
           // We come here when TLAB allocation failed.
-          // In this case we either refill TLAB or allocate directly from eden.
-          Label retry_tlab, try_eden, slow_case, slow_case_no_pop;
+          // In this case we try to allocate directly from eden.
+          Label slow_case, slow_case_no_pop;
 
 #ifdef AARCH64
           __ mov_slow(Rtemp, C1_MacroAssembler::max_array_allocation_length);
@@ -825,40 +814,6 @@
 
           __ raw_push(R4, R5, LR);
 
-          __ tlab_refill(result, arr_size, tmp1, tmp2, tmp3, try_eden, slow_case);
-
-          __ bind(retry_tlab);
-          // Get the allocation size: round_up((length << (layout_helper & 0xff)) + header_size)
-          __ ldr_u32(tmp1, Address(klass, Klass::layout_helper_offset()));
-          __ mov(arr_size, MinObjAlignmentInBytesMask);
-          __ and_32(tmp2, tmp1, (unsigned int)(Klass::_lh_header_size_mask << Klass::_lh_header_size_shift));
-
-#ifdef AARCH64
-          __ lslv_w(tmp3, length, tmp1);
-          __ add(arr_size, arr_size, tmp3);
-#else
-          __ add(arr_size, arr_size, AsmOperand(length, lsl, tmp1));
-#endif // AARCH64
-
-          __ add(arr_size, arr_size, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift));
-          __ align_reg(arr_size, arr_size, MinObjAlignmentInBytes);
-
-          // tlab_allocate initializes result and obj_end, and preserves tmp2 which contains header_size
-          __ tlab_allocate(result, obj_end, tmp1, arr_size, slow_case);
-
-          assert_different_registers(result, obj_end, klass, length, tmp1, tmp2);
-          __ initialize_header(result, klass, length, tmp1);
-
-          __ add(tmp2, result, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift));
-          if (!ZeroTLAB) {
-            __ initialize_body(tmp2, obj_end, tmp1);
-          }
-
-          __ membar(MacroAssembler::StoreStore, tmp1);
-
-          __ raw_pop_and_ret(R4, R5);
-
-          __ bind(try_eden);
           // Get the allocation size: round_up((length << (layout_helper & 0xff)) + header_size)
           __ ldr_u32(tmp1, Address(klass, Klass::layout_helper_offset()));
           __ mov(arr_size, MinObjAlignmentInBytesMask);
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -1316,98 +1316,6 @@
   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
 }
 
-void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2,
-                                 Register tmp3, Register tmp4,
-                               Label& try_eden, Label& slow_case) {
-  if (!Universe::heap()->supports_inline_contig_alloc()) {
-    b(slow_case);
-    return;
-  }
-
-  InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr());
-  Label discard_tlab, do_refill;
-  ldr(top,  Address(Rthread, JavaThread::tlab_top_offset()));
-  ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
-  ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
-
-  // Calculate amount of free space
-  sub(tmp1, tmp1, top);
-  // Retain tlab and allocate in shared space
-  // if the amount of free space in tlab is too large to discard
-  cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize));
-  b(discard_tlab, ge);
-
-  // Increment waste limit to prevent getting stuck on this slow path
-  mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment());
-  add(tmp2, tmp2, tmp3);
-  str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
-  if (TLABStats) {
-    ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
-    add_32(tmp2, tmp2, 1);
-    str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
-  }
-  b(try_eden);
-  bind_literal(intArrayKlass_addr);
-
-  bind(discard_tlab);
-  if (TLABStats) {
-    ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
-    ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
-    add_32(tmp2, tmp2, 1);
-    add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize));
-    str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
-    str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
-  }
-  // If tlab is currently allocated (top or end != null)
-  // then fill [top, end + alignment_reserve) with array object
-  cbz(top, do_refill);
-
-  // Set up the mark word
-  mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
-  str(tmp2, Address(top, oopDesc::mark_offset_in_bytes()));
-  // Set klass to intArrayKlass and the length to the remaining space
-  ldr_literal(tmp2, intArrayKlass_addr);
-  add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() -
-      typeArrayOopDesc::header_size(T_INT) * HeapWordSize);
-  Register klass = tmp2;
-  ldr(klass, Address(tmp2));
-  logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint)
-  str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes()));
-  store_klass(klass, top); // blows klass:
-  klass = noreg;
-
-  ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset()));
-  sub(tmp1, top, tmp1); // size of tlab's allocated portion
-  incr_allocated_bytes(tmp1, tmp2);
-
-  bind(do_refill);
-  // Refill the tlab with an eden allocation
-  ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset()));
-  logical_shift_left(tmp4, tmp1, LogHeapWordSize);
-  eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case);
-  str(top, Address(Rthread, JavaThread::tlab_start_offset()));
-  str(top, Address(Rthread, JavaThread::tlab_top_offset()));
-
-#ifdef ASSERT
-  // Verify that tmp1 contains tlab_end
-  ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset()));
-  add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize));
-  cmp(tmp1, tmp2);
-  breakpoint(ne);
-#endif
-
-  sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
-  str(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
-
-  if (ZeroTLAB) {
-    // clobbers start and tmp
-    // top must be preserved!
-    add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
-    ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset()));
-    zero_memory(tmp2, tmp1, tmp3);
-  }
-}
-
 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
   Label loop;
--- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp	Fri Feb 16 14:23:30 2018 -0800
@@ -359,8 +359,6 @@
   void tlab_allocate(Register obj, Register obj_end, Register tmp1,
                      RegisterOrConstant size_expression, Label& slow_case);
 
-  void tlab_refill(Register top, Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-                   Label& try_eden, Label& slow_case);
   void zero_memory(Register start, Register end, Register tmp);
 
   void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp);
--- a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -413,34 +413,9 @@
           assert(id == fast_new_instance_init_check_id, "bad StubID");
           __ set_info("fast new_instance init check", dont_gc_arguments);
         }
+
         // We don't support eden allocation.
-//        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
-//            UseTLAB && FastTLABRefill) {
-//          if (id == fast_new_instance_init_check_id) {
-//            // make sure the klass is initialized
-//            __ lbz(R0, in_bytes(InstanceKlass::init_state_offset()), R3_ARG1);
-//            __ cmpwi(CCR0, R0, InstanceKlass::fully_initialized);
-//            __ bne(CCR0, slow_path);
-//          }
-//#ifdef ASSERT
-//          // assert object can be fast path allocated
-//          {
-//            Label ok, not_ok;
-//          __ lwz(R0, in_bytes(Klass::layout_helper_offset()), R3_ARG1);
-//          // make sure it's an instance (LH > 0)
-//          __ cmpwi(CCR0, R0, 0);
-//          __ ble(CCR0, not_ok);
-//          __ testbitdi(CCR0, R0, R0, Klass::_lh_instance_slow_path_bit);
-//          __ beq(CCR0, ok);
-//
-//          __ bind(not_ok);
-//          __ stop("assert(can be fast path allocated)");
-//          __ bind(ok);
-//          }
-//#endif // ASSERT
-//          // We don't support eden allocation.
-//          __ bind(slow_path);
-//        }
+
         oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_instance), R4_ARG2);
       }
       break;
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -2336,9 +2336,6 @@
   std(new_top, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
   //verify_tlab(); not implemented
 }
-void MacroAssembler::tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case) {
-  unimplemented("tlab_refill");
-}
 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2) {
   unimplemented("incr_allocated_bytes");
 }
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp	Fri Feb 16 14:23:30 2018 -0800
@@ -602,7 +602,6 @@
     Register t1,                       // temp register
     Label&   slow_case                 // continuation point if fast allocation fails
   );
-  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
   void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2);
 
   enum { trampoline_stub_size = 6 * 4 };
--- a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -346,11 +346,6 @@
           __ set_info("fast new_instance init check", dont_gc_arguments);
         }
 
-        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
-            UseTLAB && FastTLABRefill) {
-          // Sapjvm: must call RT to generate allocation events.
-        }
-
         OopMap* map = save_live_registers_except_r2(sasm);
         int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
         oop_maps = new OopMapSet();
@@ -411,10 +406,6 @@
         }
 #endif // ASSERT
 
-        if (UseTLAB && FastTLABRefill) {
-          // sapjvm: must call RT to generate allocation events.
-        }
-
         OopMap* map = save_live_registers_except_r2(sasm);
         int call_offset;
         if (id == new_type_array_id) {
--- a/src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -389,7 +389,7 @@
         }
 
         if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
-            UseTLAB && FastTLABRefill) {
+            UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
           Label slow_path;
           Register G1_obj_size = G1;
           Register G3_t1 = G3;
@@ -424,25 +424,8 @@
           __ bind(ok);
           }
 #endif // ASSERT
-          // if we got here then the TLAB allocation failed, so try
-          // refilling the TLAB or allocating directly from eden.
-          Label retry_tlab, try_eden;
-          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves G5_klass
 
-          __ bind(retry_tlab);
-
-          // get the instance size
-          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
-
-          __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
-
-          __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ true);
-          __ verify_oop(O0_obj);
-          __ mov(O0, I0);
-          __ ret();
-          __ delayed()->restore();
-
-          __ bind(try_eden);
+          // If we got here then the TLAB allocation failed, so try allocating directly from eden.
           // get the instance size
           __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
           __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
@@ -508,73 +491,6 @@
         }
 #endif // ASSERT
 
-        if (UseTLAB && FastTLABRefill) {
-          Label slow_path;
-          Register G1_arr_size = G1;
-          Register G3_t1 = G3;
-          Register O1_t2 = O1;
-          assert_different_registers(G5_klass, G4_length, G1_arr_size, G3_t1, O1_t2);
-
-          // check that array length is small enough for fast path
-          __ set(C1_MacroAssembler::max_array_allocation_length, G3_t1);
-          __ cmp(G4_length, G3_t1);
-          __ br(Assembler::greaterUnsigned, false, Assembler::pn, slow_path);
-          __ delayed()->nop();
-
-          // if we got here then the TLAB allocation failed, so try
-          // refilling the TLAB or allocating directly from eden.
-          Label retry_tlab, try_eden;
-          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves G4_length and G5_klass
-
-          __ bind(retry_tlab);
-
-          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
-          __ ld(klass_lh, G3_t1);
-          __ sll(G4_length, G3_t1, G1_arr_size);
-          __ srl(G3_t1, Klass::_lh_header_size_shift, G3_t1);
-          __ and3(G3_t1, Klass::_lh_header_size_mask, G3_t1);
-          __ add(G1_arr_size, G3_t1, G1_arr_size);
-          __ add(G1_arr_size, MinObjAlignmentInBytesMask, G1_arr_size);  // align up
-          __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size);
-
-          __ tlab_allocate(O0_obj, G1_arr_size, 0, G3_t1, slow_path);  // preserves G1_arr_size
-
-          __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2);
-          __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
-          __ sub(G1_arr_size, G3_t1, O1_t2);  // body length
-          __ add(O0_obj, G3_t1, G3_t1);       // body start
-          if (!ZeroTLAB) {
-            __ initialize_body(G3_t1, O1_t2);
-          }
-          __ verify_oop(O0_obj);
-          __ retl();
-          __ delayed()->nop();
-
-          __ bind(try_eden);
-          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
-          __ ld(klass_lh, G3_t1);
-          __ sll(G4_length, G3_t1, G1_arr_size);
-          __ srl(G3_t1, Klass::_lh_header_size_shift, G3_t1);
-          __ and3(G3_t1, Klass::_lh_header_size_mask, G3_t1);
-          __ add(G1_arr_size, G3_t1, G1_arr_size);
-          __ add(G1_arr_size, MinObjAlignmentInBytesMask, G1_arr_size);
-          __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size);
-
-          __ eden_allocate(O0_obj, G1_arr_size, 0, G3_t1, O1_t2, slow_path);  // preserves G1_arr_size
-          __ incr_allocated_bytes(G1_arr_size, G3_t1, O1_t2);
-
-          __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2);
-          __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
-          __ sub(G1_arr_size, G3_t1, O1_t2);  // body length
-          __ add(O0_obj, G3_t1, G3_t1);       // body start
-          __ initialize_body(G3_t1, O1_t2);
-          __ verify_oop(O0_obj);
-          __ retl();
-          __ delayed()->nop();
-
-          __ bind(slow_path);
-        }
-
         if (id == new_type_array_id) {
           oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_type_array), G5_klass, G4_length);
         } else {
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -3242,127 +3242,6 @@
   verify_tlab();
 }
 
-
-void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
-  Register top = O0;
-  Register t1 = G1;
-  Register t2 = G3;
-  Register t3 = O1;
-  assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
-  Label do_refill, discard_tlab;
-
-  if (!Universe::heap()->supports_inline_contig_alloc()) {
-    // No allocation in the shared eden.
-    ba(slow_case);
-    delayed()->nop();
-  }
-
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
-
-  // calculate amount of free space
-  sub(t1, top, t1);
-  srl_ptr(t1, LogHeapWordSize, t1);
-
-  // Retain tlab and allocate object in shared space if
-  // the amount free in the tlab is too large to discard.
-  cmp(t1, t2);
-
-  brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab);
-  // increment waste limit to prevent getting stuck on this slow path
-  if (Assembler::is_simm13(ThreadLocalAllocBuffer::refill_waste_limit_increment())) {
-    delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2);
-  } else {
-    delayed()->nop();
-    // set64 does not use the temp register if the given constant is 32 bit. So
-    // we can just use any register; using G0 results in ignoring of the upper 32 bit
-    // of that value.
-    set64(ThreadLocalAllocBuffer::refill_waste_limit_increment(), t3, G0);
-    add(t2, t3, t2);
-  }
-
-  st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
-  if (TLABStats) {
-    // increment number of slow_allocations
-    ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
-    add(t2, 1, t2);
-    stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
-  }
-  ba(try_eden);
-  delayed()->nop();
-
-  bind(discard_tlab);
-  if (TLABStats) {
-    // increment number of refills
-    ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
-    add(t2, 1, t2);
-    stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()));
-    // accumulate wastage
-    ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2);
-    add(t2, t1, t2);
-    stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
-  }
-
-  // if tlab is currently allocated (top or end != null) then
-  // fill [top, end + alignment_reserve) with array object
-  br_null_short(top, Assembler::pn, do_refill);
-
-  set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
-  st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
-  // set klass to intArrayKlass
-  sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
-  add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
-  sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
-  st(t1, top, arrayOopDesc::length_offset_in_bytes());
-  set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
-  ld_ptr(t2, 0, t2);
-  // store klass last.  concurrent gcs assumes klass length is valid if
-  // klass field is not null.
-  store_klass(t2, top);
-  verify_oop(top);
-
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1);
-  sub(top, t1, t1); // size of tlab's allocated portion
-  incr_allocated_bytes(t1, t2, t3);
-
-  // refill the tlab with an eden allocation
-  bind(do_refill);
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
-  sll_ptr(t1, LogHeapWordSize, t1);
-  // allocate new tlab, address returned in top
-  eden_allocate(top, t1, 0, t2, t3, slow_case);
-
-  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
-  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
-#ifdef ASSERT
-  // check that tlab_size (t1) is still valid
-  {
-    Label ok;
-    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
-    sll_ptr(t2, LogHeapWordSize, t2);
-    cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
-    STOP("assert(t1 == tlab_size)");
-    should_not_reach_here();
-
-    bind(ok);
-  }
-#endif // ASSERT
-  add(top, t1, top); // t1 is tlab_size
-  sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
-  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
-
-  if (ZeroTLAB) {
-    // This is a fast TLAB refill, therefore the GC is not notified of it.
-    // So compiled code must fill the new TLAB with zeroes.
-    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
-    zero_memory(t2, t1);
-  }
-  verify_tlab();
-  ba(retry);
-  delayed()->nop();
-}
-
 void MacroAssembler::zero_memory(Register base, Register index) {
   assert_different_registers(base, index);
   Label loop;
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp	Fri Feb 16 14:23:30 2018 -0800
@@ -1266,7 +1266,6 @@
     Register t1,                       // temp register
     Label&   slow_case                 // continuation point if fast allocation fails
   );
-  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
   void zero_memory(Register base, Register index);
   void incr_allocated_bytes(RegisterOrConstant size_in_bytes,
                             Register t1, Register t2);
--- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -994,8 +994,8 @@
           __ set_info("fast new_instance init check", dont_gc_arguments);
         }
 
-        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
-            UseTLAB && FastTLABRefill) {
+        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && UseTLAB
+            && Universe::heap()->supports_inline_contig_alloc()) {
           Label slow_path;
           Register obj_size = rcx;
           Register t1       = rbx;
@@ -1030,21 +1030,8 @@
           // if we got here then the TLAB allocation failed, so try
           // refilling the TLAB or allocating directly from eden.
           Label retry_tlab, try_eden;
-          const Register thread =
-            __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy rdx (klass), returns rdi
-
-          __ bind(retry_tlab);
-
-          // get the instance size (size is postive so movl is fine for 64bit)
-          __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
-
-          __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
-
-          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
-          __ verify_oop(obj);
-          __ pop(rbx);
-          __ pop(rdi);
-          __ ret(0);
+          const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread);
+          NOT_LP64(get_thread(thread_reg));
 
           __ bind(try_eden);
           // get the instance size (size is postive so movl is fine for 64bit)
@@ -1128,24 +1115,13 @@
         }
 #endif // ASSERT
 
-        if (UseTLAB && FastTLABRefill) {
+        // If we got here, the TLAB allocation failed, so try allocating from
+        // eden if inline contiguous allocations are supported.
+        if (UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
           Register arr_size = rsi;
           Register t1       = rcx;  // must be rcx for use as shift count
           Register t2       = rdi;
           Label slow_path;
-          assert_different_registers(length, klass, obj, arr_size, t1, t2);
-
-          // check that array length is small enough for fast path.
-          __ cmpl(length, C1_MacroAssembler::max_array_allocation_length);
-          __ jcc(Assembler::above, slow_path);
-
-          // if we got here then the TLAB allocation failed, so try
-          // refilling the TLAB or allocating directly from eden.
-          Label retry_tlab, try_eden;
-          const Register thread =
-            __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves rbx & rdx, returns rdi
-
-          __ bind(retry_tlab);
 
           // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
           // since size is positive movl does right thing on 64bit
@@ -1160,36 +1136,11 @@
           __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
           __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
 
-          __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
-
-          __ initialize_header(obj, klass, length, t1, t2);
-          __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
-          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
-          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
-          __ andptr(t1, Klass::_lh_header_size_mask);
-          __ subptr(arr_size, t1);  // body length
-          __ addptr(t1, obj);       // body start
-          if (!ZeroTLAB) {
-            __ initialize_body(t1, arr_size, 0, t2);
-          }
-          __ verify_oop(obj);
-          __ ret(0);
+          __ eden_allocate(obj, arr_size, 0, t1, slow_path);  // preserves arr_size
 
-          __ bind(try_eden);
-          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
-          // since size is positive movl does right thing on 64bit
-          __ movl(t1, Address(klass, Klass::layout_helper_offset()));
-          // since size is postive movl does right thing on 64bit
-          __ movl(arr_size, length);
-          assert(t1 == rcx, "fixed register usage");
-          __ shlptr(arr_size /* by t1=rcx, mod 32 */);
-          __ shrptr(t1, Klass::_lh_header_size_shift);
-          __ andptr(t1, Klass::_lh_header_size_mask);
-          __ addptr(arr_size, t1);
-          __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
-          __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
-
-          __ eden_allocate(obj, arr_size, 0, t1, slow_path);  // preserves arr_size
+          // Using t2 for non 64-bit.
+          const Register thread = NOT_LP64(t2) LP64_ONLY(r15_thread);
+          NOT_LP64(get_thread(thread_reg));
           __ incr_allocated_bytes(thread, arr_size, 0);
 
           __ initialize_header(obj, klass, length, t1, t2);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -5604,121 +5604,6 @@
   verify_tlab();
 }
 
-// Preserves rbx, and rdx.
-Register MacroAssembler::tlab_refill(Label& retry,
-                                     Label& try_eden,
-                                     Label& slow_case) {
-  Register top = rax;
-  Register t1  = rcx; // object size
-  Register t2  = rsi;
-  Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
-  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
-  Label do_refill, discard_tlab;
-
-  if (!Universe::heap()->supports_inline_contig_alloc()) {
-    // No allocation in the shared eden.
-    jmp(slow_case);
-  }
-
-  NOT_LP64(get_thread(thread_reg));
-
-  movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
-  movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
-
-  // calculate amount of free space
-  subptr(t1, top);
-  shrptr(t1, LogHeapWordSize);
-
-  // Retain tlab and allocate object in shared space if
-  // the amount free in the tlab is too large to discard.
-  cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
-  jcc(Assembler::lessEqual, discard_tlab);
-
-  // Retain
-  // %%% yuck as movptr...
-  movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
-  addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
-  if (TLABStats) {
-    // increment number of slow_allocations
-    addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
-  }
-  jmp(try_eden);
-
-  bind(discard_tlab);
-  if (TLABStats) {
-    // increment number of refills
-    addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
-    // accumulate wastage -- t1 is amount free in tlab
-    addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
-  }
-
-  // if tlab is currently allocated (top or end != null) then
-  // fill [top, end + alignment_reserve) with array object
-  testptr(top, top);
-  jcc(Assembler::zero, do_refill);
-
-  // set up the mark word
-  movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
-  // set the length to the remaining space
-  subptr(t1, typeArrayOopDesc::header_size(T_INT));
-  addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
-  shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
-  movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
-  // set klass to intArrayKlass
-  // dubious reloc why not an oop reloc?
-  movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
-  // store klass last.  concurrent gcs assumes klass length is valid if
-  // klass field is not null.
-  store_klass(top, t1);
-
-  movptr(t1, top);
-  subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
-  incr_allocated_bytes(thread_reg, t1, 0);
-
-  // refill the tlab with an eden allocation
-  bind(do_refill);
-  movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
-  shlptr(t1, LogHeapWordSize);
-  // allocate new tlab, address returned in top
-  eden_allocate(top, t1, 0, t2, slow_case);
-
-  // Check that t1 was preserved in eden_allocate.
-#ifdef ASSERT
-  if (UseTLAB) {
-    Label ok;
-    Register tsize = rsi;
-    assert_different_registers(tsize, thread_reg, t1);
-    push(tsize);
-    movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
-    shlptr(tsize, LogHeapWordSize);
-    cmpptr(t1, tsize);
-    jcc(Assembler::equal, ok);
-    STOP("assert(t1 != tlab size)");
-    should_not_reach_here();
-
-    bind(ok);
-    pop(tsize);
-  }
-#endif
-  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
-  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
-  addptr(top, t1);
-  subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
-  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
-
-  if (ZeroTLAB) {
-    // This is a fast TLAB refill, therefore the GC is not notified of it.
-    // So compiled code must fill the new TLAB with zeroes.
-    movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
-    zero_memory(top, t1, 0, t2);
-  }
-
-  verify_tlab();
-  jmp(retry);
-
-  return thread_reg; // for use by caller
-}
-
 // Preserves the contents of address, destroys the contents length_in_bytes and temp.
 void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
   assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different");
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Fri Feb 16 14:23:30 2018 -0800
@@ -531,7 +531,6 @@
     Register t2,                       // temp register
     Label&   slow_case                 // continuation point if fast allocation fails
   );
-  Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
   void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
 
   void incr_allocated_bytes(Register thread,
--- a/src/hotspot/share/gc/g1/g1Arguments.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/share/gc/g1/g1Arguments.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -40,9 +40,6 @@
 void G1Arguments::initialize_flags() {
   GCArguments::initialize_flags();
   assert(UseG1GC, "Error");
-#if defined(COMPILER1) || INCLUDE_JVMCI
-  FastTLABRefill = false;
-#endif
   FLAG_SET_DEFAULT(ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads());
   if (ParallelGCThreads == 0) {
     assert(!FLAG_IS_DEFAULT(ParallelGCThreads), "The default value for ParallelGCThreads should not be 0.");
--- a/src/hotspot/share/runtime/arguments.cpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/share/runtime/arguments.cpp	Fri Feb 16 14:23:30 2018 -0800
@@ -509,7 +509,6 @@
   { "MinRAMFraction",               JDK_Version::jdk(10),  JDK_Version::undefined(), JDK_Version::undefined() },
   { "InitialRAMFraction",           JDK_Version::jdk(10),  JDK_Version::undefined(), JDK_Version::undefined() },
   { "UseMembar",                    JDK_Version::jdk(10), JDK_Version::undefined(), JDK_Version::undefined() },
-  { "FastTLABRefill",               JDK_Version::jdk(10), JDK_Version::jdk(11), JDK_Version::jdk(12) },
   { "SafepointSpinBeforeYield",     JDK_Version::jdk(10), JDK_Version::jdk(11), JDK_Version::jdk(12) },
   { "DeferThrSuspendLoopCount",     JDK_Version::jdk(10), JDK_Version::jdk(11), JDK_Version::jdk(12) },
   { "DeferPollingPageLoopCount",    JDK_Version::jdk(10), JDK_Version::jdk(11), JDK_Version::jdk(12) },
@@ -531,6 +530,7 @@
   { "CheckAssertionStatusDirectives",JDK_Version::undefined(), JDK_Version::jdk(11), JDK_Version::jdk(12) },
   { "PrintMallocFree",               JDK_Version::undefined(), JDK_Version::jdk(11), JDK_Version::jdk(12) },
   { "PrintMalloc",                   JDK_Version::undefined(), JDK_Version::jdk(11), JDK_Version::jdk(12) },
+  { "FastTLABRefill",                JDK_Version::jdk(10),     JDK_Version::jdk(11), JDK_Version::jdk(12) },
   { "PermSize",                      JDK_Version::undefined(), JDK_Version::jdk(8),  JDK_Version::undefined() },
   { "MaxPermSize",                   JDK_Version::undefined(), JDK_Version::jdk(8),  JDK_Version::undefined() },
   { "SharedReadWriteSize",           JDK_Version::undefined(), JDK_Version::jdk(10), JDK_Version::undefined() },
--- a/src/hotspot/share/runtime/globals.hpp	Fri Feb 16 11:22:53 2018 -0800
+++ b/src/hotspot/share/runtime/globals.hpp	Fri Feb 16 14:23:30 2018 -0800
@@ -2012,9 +2012,6 @@
   product(bool, ZeroTLAB, false,                                            \
           "Zero out the newly created TLAB")                                \
                                                                             \
-  product(bool, FastTLABRefill, false,                                      \
-          "(Deprecated) Use fast TLAB refill code")                         \
-                                                                            \
   product(bool, TLABStats, true,                                            \
           "Provide more detailed and expensive TLAB statistics.")           \
                                                                             \