8195112: x86 (32 bit): implementation for Thread-local handshakes
authormdoerr
Tue, 20 Feb 2018 16:10:21 +0100
changeset 49027 8dc742d9bbab
parent 49026 844bf1deff1a
child 49028 c7ba033fc07a
8195112: x86 (32 bit): implementation for Thread-local handshakes Reviewed-by: goetz, rehn
src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
src/hotspot/cpu/x86/globalDefinitions_x86.hpp
src/hotspot/cpu/x86/globals_x86.hpp
src/hotspot/cpu/x86/interp_masm_x86.cpp
src/hotspot/cpu/x86/macroAssembler_x86.cpp
src/hotspot/cpu/x86/macroAssembler_x86.hpp
src/hotspot/cpu/x86/nativeInst_x86.hpp
src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
src/hotspot/cpu/x86/templateTable_x86.cpp
src/hotspot/cpu/x86/x86_32.ad
src/hotspot/share/runtime/arguments.cpp
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -529,12 +529,16 @@
 
   if (SafepointMechanism::uses_thread_local_poll()) {
 #ifdef _LP64
-    __ movptr(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
+    const Register poll_addr = rscratch1;
+    __ movptr(poll_addr, Address(r15_thread, Thread::polling_page_offset()));
+#else
+    const Register poll_addr = rbx;
+    assert(FrameMap::is_caller_save_register(poll_addr), "will overwrite");
+    __ get_thread(poll_addr);
+    __ movptr(poll_addr, Address(poll_addr, Thread::polling_page_offset()));
+#endif
     __ relocate(relocInfo::poll_return_type);
-    __ testl(rax, Address(rscratch1, 0));
-#else
-    ShouldNotReachHere();
-#endif
+    __ testl(rax, Address(poll_addr, 0));
   } else {
     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 
@@ -555,16 +559,20 @@
   int offset = __ offset();
   if (SafepointMechanism::uses_thread_local_poll()) {
 #ifdef _LP64
-    __ movptr(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
+    const Register poll_addr = rscratch1;
+    __ movptr(poll_addr, Address(r15_thread, Thread::polling_page_offset()));
+#else
+    assert(tmp->is_cpu_register(), "needed");
+    const Register poll_addr = tmp->as_register();
+    __ get_thread(poll_addr);
+    __ movptr(poll_addr, Address(poll_addr, in_bytes(Thread::polling_page_offset())));
+#endif
     add_debug_info_for_branch(info);
     __ relocate(relocInfo::poll_type);
     address pre_pc = __ pc();
-    __ testl(rax, Address(rscratch1, 0));
+    __ testl(rax, Address(poll_addr, 0));
     address post_pc = __ pc();
-    guarantee(pointer_delta(post_pc, pre_pc, 1) == 3, "must be exact length");
-#else
-    ShouldNotReachHere();
-#endif
+    guarantee(pointer_delta(post_pc, pre_pc, 1) == 2 LP64_ONLY(+1), "must be exact length");
   } else {
     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
     if (Assembler::is_polling_page_far()) {
--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -143,6 +143,7 @@
 
 
 LIR_Opr LIRGenerator::safepoint_poll_register() {
+  NOT_LP64( if (SafepointMechanism::uses_thread_local_poll()) { return new_register(T_ADDRESS); } )
   return LIR_OprFact::illegalOpr;
 }
 
@@ -1515,7 +1516,7 @@
   if (x->is_safepoint()) {
     // increment backedge counter if needed
     increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
-    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
+    __ safepoint(safepoint_poll_register(), state_for(x, x->state_before()));
   }
   set_no_result(x);
 
--- a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,9 +65,6 @@
 #define SUPPORT_RESERVED_STACK_AREA
 #endif
 
-#ifdef _LP64
-// X64 have implemented the local polling
 #define THREAD_LOCAL_POLL
-#endif
 
 #endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP
--- a/src/hotspot/cpu/x86/globals_x86.hpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/globals_x86.hpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -97,9 +97,10 @@
 
 define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
 
-#ifdef _LP64
+#if defined(_LP64) || defined(_WINDOWS)
 define_pd_global(bool, ThreadLocalHandshakes, true);
 #else
+// get_thread() is slow on linux 32 bit, therefore off by default
 define_pd_global(bool, ThreadLocalHandshakes, false);
 #endif
 
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -830,13 +830,12 @@
   if (verifyoop) {
     verify_oop(rax, state);
   }
+
+  address* const safepoint_table = Interpreter::safept_table(state);
 #ifdef _LP64
-
   Label no_safepoint, dispatch;
-  address* const safepoint_table = Interpreter::safept_table(state);
   if (SafepointMechanism::uses_thread_local_poll() && table != safepoint_table && generate_poll) {
     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
-
     testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
 
     jccb(Assembler::zero, no_safepoint);
@@ -851,9 +850,23 @@
 
 #else
   Address index(noreg, rbx, Address::times_ptr);
-  ExternalAddress tbl((address)table);
-  ArrayAddress dispatch(tbl, index);
-  jump(dispatch);
+  if (SafepointMechanism::uses_thread_local_poll() && table != safepoint_table && generate_poll) {
+    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
+    Label no_safepoint;
+    const Register thread = rcx;
+    get_thread(thread);
+    testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+
+    jccb(Assembler::zero, no_safepoint);
+    ArrayAddress dispatch_addr(ExternalAddress((address)safepoint_table), index);
+    jump(dispatch_addr);
+    bind(no_safepoint);
+  }
+
+  {
+    ArrayAddress dispatch_addr(ExternalAddress((address)table), index);
+    jump(dispatch_addr);
+  }
 #endif // _LP64
 }
 
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3767,10 +3767,17 @@
   movl(as_Address(ArrayAddress(page, index)), tmp);
 }
 
-#ifdef _LP64
 void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg) {
   if (SafepointMechanism::uses_thread_local_poll()) {
-    testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+#ifdef _LP64
+    assert(thread_reg == r15_thread, "should be");
+#else
+    if (thread_reg == noreg) {
+      thread_reg = temp_reg;
+      get_thread(thread_reg);
+    }
+#endif
+    testb(Address(thread_reg, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
     jcc(Assembler::notZero, slow_path); // handshake bit set implies poll
   } else {
     cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
@@ -3778,13 +3785,6 @@
     jcc(Assembler::notEqual, slow_path);
   }
 }
-#else
-void MacroAssembler::safepoint_poll(Label& slow_path) {
-  cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
-      SafepointSynchronize::_not_synchronized);
-  jcc(Assembler::notEqual, slow_path);
-}
-#endif
 
 // Calls to C land
 //
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -656,11 +656,9 @@
   // Support for serializing memory accesses between threads
   void serialize_memory(Register thread, Register tmp);
 
-#ifdef _LP64
+  // If thread_reg is != noreg the code assumes the register passed contains
+  // the thread (required on 64 bit).
   void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg);
-#else
-  void safepoint_poll(Label& slow_path);
-#endif
 
   void verify_tlab();
 
--- a/src/hotspot/cpu/x86/nativeInst_x86.hpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/nativeInst_x86.hpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -704,14 +704,18 @@
 inline bool NativeInstruction::is_cond_jump()    { return (int_at(0) & 0xF0FF) == 0x800F /* long jump */ ||
                                                           (ubyte_at(0) & 0xF0) == 0x70;  /* short jump */ }
 inline bool NativeInstruction::is_safepoint_poll() {
+  if (SafepointMechanism::uses_thread_local_poll()) {
 #ifdef AMD64
-  if (SafepointMechanism::uses_thread_local_poll()) {
     const bool has_rex_prefix = ubyte_at(0) == NativeTstRegMem::instruction_rex_b_prefix;
     const int test_offset = has_rex_prefix ? 1 : 0;
+#else
+    const int test_offset = 0;
+#endif
     const bool is_test_opcode = ubyte_at(test_offset) == NativeTstRegMem::instruction_code_memXregl;
     const bool is_rax_target = (ubyte_at(test_offset + 1) & NativeTstRegMem::modrm_mask) == NativeTstRegMem::modrm_reg;
     return is_test_opcode && is_rax_target;
   }
+#ifdef AMD64
   // Try decoding a near safepoint first:
   if (ubyte_at(0) == NativeTstRegMem::instruction_code_memXregl &&
       ubyte_at(1) == 0x05) { // 00 rax 101
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -2111,16 +2111,13 @@
   Label after_transition;
 
   // check for safepoint operation in progress and/or pending suspend requests
-  { Label Continue;
-
-    __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
-             SafepointSynchronize::_not_synchronized);
-
-    Label L;
-    __ jcc(Assembler::notEqual, L);
+  { Label Continue, slow_path;
+
+    __ safepoint_poll(slow_path, thread, noreg);
+
     __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
     __ jcc(Assembler::equal, Continue);
-    __ bind(L);
+    __ bind(slow_path);
 
     // Don't use call_VM as it will see a possible pending exception and forward it
     // and never return here preventing us from clearing _last_native_pc down below.
@@ -2996,8 +2993,11 @@
 
   // if this was not a poll_return then we need to correct the return address now.
   if (!cause_return) {
-    __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
-    __ movptr(Address(rbp, wordSize), rax);
+    // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack.
+    // Additionally, rbx is a callee saved register and we can look at it later to determine
+    // if someone changed the return address for us!
+    __ movptr(rbx, Address(java_thread, JavaThread::saved_exception_pc_offset()));
+    __ movptr(Address(rbp, wordSize), rbx);
   }
 
   // do the call
@@ -3029,11 +3029,63 @@
 
   __ bind(noException);
 
+  Label no_adjust, bail, not_special;
+  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
+    // If our stashed return pc was modified by the runtime we avoid touching it
+    __ cmpptr(rbx, Address(rbp, wordSize));
+    __ jccb(Assembler::notEqual, no_adjust);
+
+    // Skip over the poll instruction.
+    // See NativeInstruction::is_safepoint_poll()
+    // Possible encodings:
+    //      85 00       test   %eax,(%rax)
+    //      85 01       test   %eax,(%rcx)
+    //      85 02       test   %eax,(%rdx)
+    //      85 03       test   %eax,(%rbx)
+    //      85 06       test   %eax,(%rsi)
+    //      85 07       test   %eax,(%rdi)
+    //
+    //      85 04 24    test   %eax,(%rsp)
+    //      85 45 00    test   %eax,0x0(%rbp)
+
+#ifdef ASSERT
+    __ movptr(rax, rbx); // remember where 0x85 should be, for verification below
+#endif
+    // rsp/rbp base encoding takes 3 bytes with the following register values:
+    // rsp 0x04
+    // rbp 0x05
+    __ movzbl(rcx, Address(rbx, 1));
+    __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05
+    __ subptr(rcx, 4);    // looking for 0x00 .. 0x01
+    __ cmpptr(rcx, 1);
+    __ jcc(Assembler::above, not_special);
+    __ addptr(rbx, 1);
+    __ bind(not_special);
+#ifdef ASSERT
+    // Verify the correct encoding of the poll we're about to skip.
+    __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl);
+    __ jcc(Assembler::notEqual, bail);
+    // Mask out the modrm bits
+    __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask);
+    // rax encodes to 0, so if the bits are nonzero it's incorrect
+    __ jcc(Assembler::notZero, bail);
+#endif
+    // Adjust return pc forward to step over the safepoint poll instruction
+    __ addptr(rbx, 2);
+    __ movptr(Address(rbp, wordSize), rbx);
+  }
+
+  __ bind(no_adjust);
   // Normal exit, register restoring and exit
   RegisterSaver::restore_live_registers(masm, save_vectors);
 
   __ ret(0);
 
+#ifdef ASSERT
+  __ bind(bail);
+  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
+#endif
+
   // make sure all code is generated
   masm->flush();
 
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1148,7 +1148,7 @@
     Label slow_path;
 
 #ifndef _LP64
-    __ safepoint_poll(slow_path);
+    __ safepoint_poll(slow_path, thread, noreg);
 #else
     __ safepoint_poll(slow_path, r15_thread, rscratch1);
 #endif
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,10 +61,7 @@
 
     Label slow_path;
     // If we need a safepoint check, generate full interpreter entry.
-    ExternalAddress state(SafepointSynchronize::address_of_state());
-    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
-             SafepointSynchronize::_not_synchronized);
-    __ jcc(Assembler::notEqual, slow_path);
+    __ safepoint_poll(slow_path, noreg, rdi);
 
     // We don't generate local frame and don't align stack because
     // we call stub code and there is no safepoint on this path.
@@ -113,10 +110,7 @@
 
     Label slow_path;
     // If we need a safepoint check, generate full interpreter entry.
-    ExternalAddress state(SafepointSynchronize::address_of_state());
-    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
-             SafepointSynchronize::_not_synchronized);
-    __ jcc(Assembler::notEqual, slow_path);
+    __ safepoint_poll(slow_path, noreg, rdi);
 
     // We don't generate local frame and don't align stack because
     // we call stub code and there is no safepoint on this path.
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2692,11 +2692,16 @@
     __ bind(skip_register_finalizer);
   }
 
-#ifdef _LP64
   if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
     Label no_safepoint;
     NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
+#ifdef _LP64
     __ testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+#else
+    const Register thread = rdi;
+    __ get_thread(thread);
+    __ testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+#endif
     __ jcc(Assembler::zero, no_safepoint);
     __ push(state);
     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
@@ -2704,7 +2709,6 @@
     __ pop(state);
     __ bind(no_safepoint);
   }
-#endif
 
   // Narrow result if state is itos but result type is smaller.
   // Need to narrow in the return bytecode rather than in generate_return_entry
--- a/src/hotspot/cpu/x86/x86_32.ad	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/cpu/x86/x86_32.ad	Tue Feb 20 16:10:21 2018 +0100
@@ -317,7 +317,7 @@
 // Indicate if the safepoint node needs the polling page as an input.
 // Since x86 does have absolute addressing, it doesn't.
 bool SafePointNode::needs_polling_address_input() {
-  return false;
+  return SafepointMechanism::uses_thread_local_poll();
 }
 
 //
@@ -706,34 +706,25 @@
   }
 
   if (do_polling() && C->is_method_compilation()) {
-    cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
-    emit_opcode(cbuf,0x85);
-    emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
-    emit_d32(cbuf, (intptr_t)os::get_polling_page());
+    if (SafepointMechanism::uses_thread_local_poll()) {
+      Register pollReg = as_Register(EBX_enc);
+      MacroAssembler masm(&cbuf);
+      masm.get_thread(pollReg);
+      masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
+      masm.relocate(relocInfo::poll_return_type);
+      masm.testl(rax, Address(pollReg, 0));
+    } else {
+      cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
+      emit_opcode(cbuf,0x85);
+      emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
+      emit_d32(cbuf, (intptr_t)os::get_polling_page());
+    }
   }
 }
 
 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
-  Compile *C = ra_->C;
-  // If method set FPU control word, restore to standard control word
-  int size = C->in_24_bit_fp_mode() ? 6 : 0;
-  if (C->max_vector_size() > 16) size += 3; // vzeroupper
-  if (do_polling() && C->is_method_compilation()) size += 6;
-
-  int framesize = C->frame_size_in_bytes();
-  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove two words for return addr and rbp,
-  framesize -= 2*wordSize;
-
-  size++; // popl rbp,
-
-  if (framesize >= 128) {
-    size += 6;
-  } else {
-    size += framesize ? 3 : 0;
-  }
-  size += 64; // added to support ReservedStackAccess
-  return size;
+  return MachNode::size(ra_); // too many variables; just compute it
+                              // the hard way
 }
 
 int MachEpilogNode::reloc() const {
@@ -13336,6 +13327,7 @@
 // ============================================================================
 // Safepoint Instruction
 instruct safePoint_poll(eFlagsReg cr) %{
+  predicate(SafepointMechanism::uses_global_page_poll());
   match(SafePoint);
   effect(KILL cr);
 
@@ -13354,6 +13346,25 @@
   ins_pipe( ialu_reg_mem );
 %}
 
+instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
+  predicate(SafepointMechanism::uses_thread_local_poll());
+  match(SafePoint poll);
+  effect(KILL cr, USE poll);
+
+  format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
+  ins_cost(125);
+  // EBP would need size(3)
+  size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
+  ins_encode %{
+    __ relocate(relocInfo::poll_type);
+    address pre_pc = __ pc();
+    __ testl(rax, Address($poll$$Register, 0));
+    address post_pc = __ pc();
+    guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
 
 // ============================================================================
 // This name is KNOWN by the ADLC and cannot be changed.
--- a/src/hotspot/share/runtime/arguments.cpp	Tue Feb 20 07:46:40 2018 -0500
+++ b/src/hotspot/share/runtime/arguments.cpp	Tue Feb 20 16:10:21 2018 +0100
@@ -1826,6 +1826,13 @@
   }
 #endif
 
+#if defined(IA32)
+  // Only server compiler can optimize safepoints well enough.
+  if (!is_server_compilation_mode_vm()) {
+    FLAG_SET_ERGO_IF_DEFAULT(bool, ThreadLocalHandshakes, false);
+  }
+#endif
+
   set_conservative_max_heap_alignment();
 
 #ifndef ZERO