8189941: Implementation JEP 312: Thread-local handshake
authorrehn
Thu, 31 Aug 2017 10:00:28 +0200
changeset 47881 0ce0ac68ace7
parent 47824 cf127be65014
child 47882 a93ce8f7bddb
8189941: Implementation JEP 312: Thread-local handshake Summary: Introduce a way to execute a callback on threads without performing a global VM safepoint. Make it both possible and cheap to stop individual threads and not just all threads or none. Reviewed-by: mdoerr, neliasso, acorn, aph, coleenp, dholmes Contributed-by: mikael.gerdin@oracle.com, erik.osterlund@oracle.com, robbin.ehn@oracle.com
make/test/JtregNativeHotspot.gmk
src/hotspot/cpu/aarch64/globals_aarch64.hpp
src/hotspot/cpu/arm/globals_arm.hpp
src/hotspot/cpu/ppc/globals_ppc.hpp
src/hotspot/cpu/s390/globals_s390.hpp
src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp
src/hotspot/cpu/sparc/globals_sparc.hpp
src/hotspot/cpu/sparc/interp_masm_sparc.cpp
src/hotspot/cpu/sparc/interp_masm_sparc.hpp
src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp
src/hotspot/cpu/sparc/sparc.ad
src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp
src/hotspot/cpu/sparc/templateTable_sparc.cpp
src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
src/hotspot/cpu/x86/globalDefinitions_x86.hpp
src/hotspot/cpu/x86/globals_x86.hpp
src/hotspot/cpu/x86/interp_masm_x86.cpp
src/hotspot/cpu/x86/interp_masm_x86.hpp
src/hotspot/cpu/x86/macroAssembler_x86.cpp
src/hotspot/cpu/x86/macroAssembler_x86.hpp
src/hotspot/cpu/x86/nativeInst_x86.hpp
src/hotspot/cpu/x86/relocInfo_x86.cpp
src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
src/hotspot/cpu/x86/templateTable_x86.cpp
src/hotspot/cpu/x86/x86_64.ad
src/hotspot/cpu/zero/globals_zero.hpp
src/hotspot/os/aix/os_aix.cpp
src/hotspot/os/aix/safepointMechanism_aix.cpp
src/hotspot/os/bsd/os_bsd.cpp
src/hotspot/os/linux/os_linux.cpp
src/hotspot/os/solaris/os_solaris.cpp
src/hotspot/os/windows/os_windows.cpp
src/hotspot/os_cpu/linux_sparc/thread_linux_sparc.hpp
src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp
src/hotspot/os_cpu/solaris_sparc/thread_solaris_sparc.hpp
src/hotspot/share/interpreter/templateInterpreter.hpp
src/hotspot/share/jvmci/jvmciCodeInstaller.cpp
src/hotspot/share/logging/logTag.hpp
src/hotspot/share/opto/parse1.cpp
src/hotspot/share/prims/whitebox.cpp
src/hotspot/share/runtime/arguments.cpp
src/hotspot/share/runtime/commandLineFlagConstraintsRuntime.cpp
src/hotspot/share/runtime/commandLineFlagConstraintsRuntime.hpp
src/hotspot/share/runtime/globals.hpp
src/hotspot/share/runtime/handshake.cpp
src/hotspot/share/runtime/handshake.hpp
src/hotspot/share/runtime/interfaceSupport.hpp
src/hotspot/share/runtime/mutex.cpp
src/hotspot/share/runtime/objectMonitor.cpp
src/hotspot/share/runtime/safepoint.cpp
src/hotspot/share/runtime/safepoint.hpp
src/hotspot/share/runtime/safepointMechanism.cpp
src/hotspot/share/runtime/safepointMechanism.hpp
src/hotspot/share/runtime/safepointMechanism.inline.hpp
src/hotspot/share/runtime/thread.cpp
src/hotspot/share/runtime/thread.hpp
src/hotspot/share/runtime/thread.inline.hpp
src/hotspot/share/runtime/timer.cpp
src/hotspot/share/runtime/timer.hpp
src/hotspot/share/runtime/vm_operations.hpp
test/hotspot/jtreg/TEST.groups
test/hotspot/jtreg/runtime/handshake/HandshakeTransitionTest.java
test/hotspot/jtreg/runtime/handshake/HandshakeWalkExitTest.java
test/hotspot/jtreg/runtime/handshake/HandshakeWalkStackFallbackTest.java
test/hotspot/jtreg/runtime/handshake/HandshakeWalkStackTest.java
test/hotspot/jtreg/runtime/handshake/libHandshakeTransitionTest.c
test/lib/sun/hotspot/WhiteBox.java
--- a/make/test/JtregNativeHotspot.gmk	Sat Nov 11 01:21:09 2017 +0100
+++ b/make/test/JtregNativeHotspot.gmk	Thu Aug 31 10:00:28 2017 +0200
@@ -60,6 +60,7 @@
     $(TOPDIR)/test/hotspot/jtreg/runtime/SameObject \
     $(TOPDIR)/test/hotspot/jtreg/runtime/BoolReturn \
     $(TOPDIR)/test/hotspot/jtreg/runtime/noClassDefFoundMsg \
+    $(TOPDIR)/test/hotspot/jtreg/runtime/handshake \
     $(TOPDIR)/test/hotspot/jtreg/runtime/RedefineTests \
     $(TOPDIR)/test/hotspot/jtreg/compiler/floatingpoint/ \
     $(TOPDIR)/test/hotspot/jtreg/compiler/calls \
@@ -108,6 +109,7 @@
     BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAThreadStart := -lc
     BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libAllowedFunctions := -lc
     BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libRedefineDoubleDelete := -lc
+    BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libHandshakeTransitionTest := -lc
 endif
 
 ifeq ($(OPENJDK_TARGET_OS), linux)
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -79,6 +79,8 @@
 // Clear short arrays bigger than one word in an arch-specific way
 define_pd_global(intx, InitArrayShortSize, BytesPerLong);
 
+define_pd_global(bool, ThreadLocalHandshakes, false);
+
 #if defined(COMPILER1) || defined(COMPILER2)
 define_pd_global(intx, InlineSmallCode,          1000);
 #endif
--- a/src/hotspot/cpu/arm/globals_arm.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/arm/globals_arm.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -79,6 +79,8 @@
 
 define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
 
+define_pd_global(bool, ThreadLocalHandshakes, false);
+
 #define ARCH_FLAGS(develop, \
                    product, \
                    diagnostic, \
--- a/src/hotspot/cpu/ppc/globals_ppc.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/ppc/globals_ppc.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -83,6 +83,8 @@
 // 2x unrolled loop is shorter with more than 9 HeapWords.
 define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
 
+define_pd_global(bool, ThreadLocalHandshakes, false);
+
 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, \
                    product, \
--- a/src/hotspot/cpu/s390/globals_s390.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/s390/globals_s390.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -85,6 +85,8 @@
 // 8146801 (Short Array Allocation): No performance work done here yet.
 define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong);
 
+define_pd_global(bool, ThreadLocalHandshakes, false);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint, writeable) \
                                                                               \
   /* Reoptimize code-sequences of calls at runtime, e.g. replace an */        \
--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -35,6 +35,7 @@
 #include "gc/shared/collectedHeap.hpp"
 #include "nativeInst_sparc.hpp"
 #include "oops/objArrayKlass.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
 
 #define __ _masm->
@@ -1415,7 +1416,11 @@
   if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
     __ reserved_stack_check();
   }
-  __ set((intptr_t)os::get_polling_page(), L0);
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    __ ld_ptr(Address(G2_thread, Thread::polling_page_offset()), L0);
+  } else {
+    __ set((intptr_t)os::get_polling_page(), L0);
+  }
   __ relocate(relocInfo::poll_return_type);
   __ ld_ptr(L0, 0, G0);
   __ ret();
@@ -1424,11 +1429,16 @@
 
 
 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
-  __ set((intptr_t)os::get_polling_page(), tmp->as_register());
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    __ ld_ptr(Address(G2_thread, Thread::polling_page_offset()), tmp->as_register());
+  } else {
+    __ set((intptr_t)os::get_polling_page(), tmp->as_register());
+  }
   if (info != NULL) {
     add_debug_info_for_branch(info);
   }
   int offset = __ offset();
+
   __ relocate(relocInfo::poll_type);
   __ ld_ptr(tmp->as_register(), 0, G0);
   return offset;
--- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -33,6 +33,7 @@
 #include "ci/ciArray.hpp"
 #include "ci/ciObjArrayKlass.hpp"
 #include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/safepointMechanism.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "vmreg_sparc.inline.hpp"
@@ -1304,7 +1305,7 @@
   if (x->is_safepoint()) {
     // increment backedge counter if needed
     increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
-    __ safepoint(new_register(T_INT), state_for(x, x->state_before()));
+    __ safepoint(safepoint_poll_register(), state_for(x, x->state_before()));
   }
 
   __ cmp(lir_cond(cond), left, right);
--- a/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -52,4 +52,7 @@
 #define SUPPORT_RESERVED_STACK_AREA
 #endif
 
+// SPARC have implemented the local polling
+#define THREAD_LOCAL_POLL
+
 #endif // CPU_SPARC_VM_GLOBALDEFINITIONS_SPARC_HPP
--- a/src/hotspot/cpu/sparc/globals_sparc.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/globals_sparc.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -87,6 +87,8 @@
 
 define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
 
+define_pd_global(bool, ThreadLocalHandshakes, true);
+
 #define ARCH_FLAGS(develop, \
                    product, \
                    diagnostic, \
--- a/src/hotspot/cpu/sparc/interp_masm_sparc.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/interp_masm_sparc.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -36,6 +36,7 @@
 #include "prims/jvmtiThreadState.hpp"
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
+#include "runtime/safepointMechanism.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/align.hpp"
@@ -95,12 +96,11 @@
   else                delayed()->nop();
 }
 
-
-void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) {
+void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr, bool generate_poll) {
   // %%%% consider branching to a single shared dispatch stub (for each bcp_incr)
   assert_not_delayed();
   ldub( Lbcp, bcp_incr, Lbyte_code);               // load next bytecode
-  dispatch_Lbyte_code(state, Interpreter::dispatch_table(state), bcp_incr);
+  dispatch_Lbyte_code(state, Interpreter::dispatch_table(state), bcp_incr, true, generate_poll);
 }
 
 
@@ -261,15 +261,34 @@
 // common code to dispatch and dispatch_only
 // dispatch value in Lbyte_code and increment Lbcp
 
-void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, address* table, int bcp_incr, bool verify) {
+void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, address* table, int bcp_incr, bool verify, bool generate_poll) {
   verify_FPU(1, state);
   // %%%%% maybe implement +VerifyActivationFrameSize here
   //verify_thread(); //too slow; we will just verify on method entry & exit
   if (verify) interp_verify_oop(Otos_i, state, __FILE__, __LINE__);
   // dispatch table to use
   AddressLiteral tbl(table);
+  Label dispatch;
+
+  if (SafepointMechanism::uses_thread_local_poll() && generate_poll) {
+    AddressLiteral sfpt_tbl(Interpreter::safept_table(state));
+    Label no_safepoint;
+
+    if (tbl.value() != sfpt_tbl.value()) {
+      ldx(Address(G2_thread, Thread::polling_page_offset()), G3_scratch, 0);
+      // Armed page has poll_bit set, if poll bit is cleared just continue.
+      and3(G3_scratch, SafepointMechanism::poll_bit(), G3_scratch);
+
+      br_null_short(G3_scratch, Assembler::pt, no_safepoint);
+      set(sfpt_tbl, G3_scratch);
+      ba_short(dispatch);
+    }
+    bind(no_safepoint);
+  }
+
+  set(tbl, G3_scratch);                               // compute addr of table
+  bind(dispatch);
   sll(Lbyte_code, LogBytesPerWord, Lbyte_code);       // multiply by wordSize
-  set(tbl, G3_scratch);                               // compute addr of table
   ld_ptr(G3_scratch, Lbyte_code, G3_scratch);         // get entry addr
   jmp( G3_scratch, 0 );
   if (bcp_incr != 0)  delayed()->inc(Lbcp, bcp_incr);
--- a/src/hotspot/cpu/sparc/interp_masm_sparc.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/interp_masm_sparc.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -98,7 +98,7 @@
   void dispatch_epilog(TosState state, int step = 0);
   void dispatch_only(TosState state);
   void dispatch_normal(TosState state);
-  void dispatch_next(TosState state, int step = 0);
+  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
   void dispatch_next_noverify_oop(TosState state, int step = 0);
   void dispatch_via (TosState state, address* table);
 
@@ -113,7 +113,7 @@
                          bool install_monitor_exception = true);
 
  protected:
-  void dispatch_Lbyte_code(TosState state, address* table, int bcp_incr = 0, bool verify = true);
+  void dispatch_Lbyte_code(TosState state, address* table, int bcp_incr = 0, bool verify = true, bool generate_poll = false);
 
  public:
   // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -37,6 +37,8 @@
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/os.inline.hpp"
+#include "runtime/safepoint.hpp"
+#include "runtime/safepointMechanism.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "utilities/align.hpp"
@@ -236,6 +238,20 @@
 }
 
 
+void MacroAssembler::safepoint_poll(Label& slow_path, bool a, Register thread_reg, Register temp_reg) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    ldx(Address(thread_reg, Thread::polling_page_offset()), temp_reg, 0);
+    // Armed page has poll bit set.
+    and3(temp_reg, SafepointMechanism::poll_bit(), temp_reg);
+    br_notnull(temp_reg, a, Assembler::pn, slow_path);
+  } else {
+    AddressLiteral sync_state(SafepointSynchronize::address_of_state());
+
+    load_contents(sync_state, temp_reg);
+    cmp(temp_reg, SafepointSynchronize::_not_synchronized);
+    br(Assembler::notEqual, a, Assembler::pn, slow_path);
+  }
+}
 
 void MacroAssembler::enter() {
   Unimplemented();
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -986,6 +986,8 @@
   // Support for serializing memory accesses between threads
   void serialize_memory(Register thread, Register tmp1, Register tmp2);
 
+  void safepoint_poll(Label& slow_path, bool a, Register thread_reg, Register temp_reg);
+
   // Stack frame creation/removal
   void enter();
   void leave();
--- a/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -2359,7 +2359,6 @@
   // Block, if necessary, before resuming in _thread_in_Java state.
   // In order for GC to work, don't clear the last_Java_sp until after blocking.
   { Label no_block;
-    AddressLiteral sync_state(SafepointSynchronize::address_of_state());
 
     // Switch thread to "native transition" state before reading the synchronization state.
     // This additional state is necessary because reading and testing the synchronization
@@ -2382,12 +2381,10 @@
         __ serialize_memory(G2_thread, G1_scratch, G3_scratch);
       }
     }
-    __ load_contents(sync_state, G3_scratch);
-    __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);
 
     Label L;
     Address suspend_state(G2_thread, JavaThread::suspend_flags_offset());
-    __ br(Assembler::notEqual, false, Assembler::pn, L);
+    __ safepoint_poll(L, false, G2_thread, G3_scratch);
     __ delayed()->ld(suspend_state, G3_scratch);
     __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block);
     __ bind(L);
@@ -3118,7 +3115,7 @@
   } else {
     // Make it look like we were called via the poll
     // so that frame constructor always sees a valid return address
-    __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7);
+    __ ld_ptr(Address(G2_thread, JavaThread::saved_exception_pc_offset()), O7);
     __ sub(O7, frame::pc_return_offset, O7);
   }
 
@@ -3127,6 +3124,15 @@
   // setup last_Java_sp (blows G4)
   __ set_last_Java_frame(SP, noreg);
 
+  Register saved_O7 = O7->after_save();
+  if (!cause_return && SafepointMechanism::uses_thread_local_poll()) {
+    // Keep a copy of the return pc in L0 to detect if it gets modified
+    __ mov(saved_O7, L0);
+    // Adjust and keep a copy of our npc saved by the signal handler
+    __ ld_ptr(Address(G2_thread, JavaThread::saved_exception_npc_offset()), L1);
+    __ sub(L1, frame::pc_return_offset, L1);
+  }
+
   // call into the runtime to handle illegal instructions exception
   // Do not use call_VM_leaf, because we need to make a GC map at this call site.
   __ mov(G2_thread, O0);
@@ -3150,6 +3156,12 @@
   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
   __ br_notnull_short(O1, Assembler::pn, pending);
 
+  if (!cause_return && SafepointMechanism::uses_thread_local_poll()) {
+    // If nobody modified our return pc then we must return to the npc which he saved in L1
+    __ cmp(saved_O7, L0);
+    __ movcc(Assembler::equal, false, Assembler::ptr_cc, L1, saved_O7);
+  }
+
   RegisterSaver::restore_live_registers(masm);
 
   // We are back the the original state on entry and ready to go.
--- a/src/hotspot/cpu/sparc/sparc.ad	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/sparc.ad	Thu Aug 31 10:00:28 2017 +0200
@@ -1206,7 +1206,11 @@
   Compile* C = ra_->C;
 
   if(do_polling() && ra_->C->is_method_compilation()) {
-    st->print("SETHI  #PollAddr,L0\t! Load Polling address\n\t");
+    if (SafepointMechanism::uses_global_page_poll()) {
+      st->print("SETHI  #PollAddr,L0\t! Load Polling address\n\t");
+    } else {
+      st->print("LDX    [R_G2 + #poll_offset],L0\t! Load local polling address\n\t");
+    }
     st->print("LDX    [L0],G0\t!Poll for Safepointing\n\t");
   }
 
@@ -1233,8 +1237,12 @@
 
   // If this does safepoint polling, then do it here
   if(do_polling() && ra_->C->is_method_compilation()) {
-    AddressLiteral polling_page(os::get_polling_page());
-    __ sethi(polling_page, L0);
+    if (SafepointMechanism::uses_thread_local_poll()) {
+      __ ld_ptr(Address(G2_thread, Thread::polling_page_offset()), L0);
+    } else {
+      AddressLiteral polling_page(os::get_polling_page());
+      __ sethi(polling_page, L0);
+    }
     __ relocate(relocInfo::poll_return_type);
     __ ld_ptr(L0, 0, G0);
   }
@@ -1266,6 +1274,7 @@
 }
 
 int MachEpilogNode::safepoint_offset() const {
+  assert(SafepointMechanism::uses_global_page_poll(), "sanity");
   assert( do_polling(), "no return for this epilog node");
   return MacroAssembler::insts_for_sethi(os::get_polling_page()) * BytesPerInstWord;
 }
--- a/src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -912,10 +912,8 @@
 
     Label L_slow_path;
     // If we need a safepoint check, generate full interpreter entry.
-    ExternalAddress state(SafepointSynchronize::address_of_state());
-    __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2);
-    __ set(SafepointSynchronize::_not_synchronized, O3);
-    __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path);
+    __ safepoint_poll(L_slow_path, false, G2_thread, O2);
+    __ delayed()->nop();
 
     // Load parameters
     const Register crc   = O0; // initial crc
@@ -956,10 +954,9 @@
 
     Label L_slow_path;
     // If we need a safepoint check, generate full interpreter entry.
-    ExternalAddress state(SafepointSynchronize::address_of_state());
-    __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2);
-    __ set(SafepointSynchronize::_not_synchronized, O3);
-    __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path);
+
+    __ safepoint_poll(L_slow_path, false, G2_thread, O2);
+    __ delayed()->nop();
 
     // Load parameters from the stack
     const Register crc    = O0; // initial crc
@@ -1397,7 +1394,6 @@
   // Block, if necessary, before resuming in _thread_in_Java state.
   // In order for GC to work, don't clear the last_Java_sp until after blocking.
   { Label no_block;
-    AddressLiteral sync_state(SafepointSynchronize::address_of_state());
 
     // Switch thread to "native transition" state before reading the synchronization state.
     // This additional state is necessary because reading and testing the synchronization
@@ -1420,11 +1416,9 @@
         __ serialize_memory(G2_thread, G1_scratch, G3_scratch);
       }
     }
-    __ load_contents(sync_state, G3_scratch);
-    __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);
 
     Label L;
-    __ br(Assembler::notEqual, false, Assembler::pn, L);
+    __ safepoint_poll(L, false, G2_thread, G3_scratch);
     __ delayed()->ld(G2_thread, JavaThread::suspend_flags_offset(), G3_scratch);
     __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block);
     __ bind(L);
--- a/src/hotspot/cpu/sparc/templateTable_sparc.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/sparc/templateTable_sparc.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1499,7 +1499,7 @@
     // Push returnAddress for "ret" on stack
     __ push_ptr(Otos_i);
     // And away we go!
-    __ dispatch_next(vtos);
+    __ dispatch_next(vtos, 0, true);
     return;
   }
 
@@ -1607,7 +1607,7 @@
   // continue with bytecode @ target
   // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above,
   // %%%%% and changing dispatch_next to dispatch_only
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, true);
 }
 
 
@@ -1676,7 +1676,7 @@
   __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
   __ add(G3_scratch, Otos_i, G3_scratch);
   __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, true);
 }
 
 
@@ -1691,7 +1691,7 @@
   __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch);
   __ add(G3_scratch, Otos_i, G3_scratch);
   __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp);
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, true);
 }
 
 
@@ -1727,7 +1727,7 @@
   // continue execution
   __ bind(continue_execution);
   __ add(Lbcp, O2, Lbcp);
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, true);
 }
 
 
@@ -1779,7 +1779,7 @@
     __ bind(continue_execution);
   }
   __ add(Lbcp, O4, Lbcp);
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, true);
 }
 
 
@@ -1888,7 +1888,7 @@
 
   __ bind(continue_execution);
   __ add( Lbcp, Rj, Lbcp );
-  __ dispatch_next( vtos );
+  __ dispatch_next(vtos, 0, true);
 }
 
 
@@ -1914,6 +1914,18 @@
     __ bind(skip_register_finalizer);
   }
 
+  if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
+    Label no_safepoint;
+    __ ldx(Address(G2_thread, Thread::polling_page_offset()), G3_scratch, 0);
+    __ btst(SafepointMechanism::poll_bit(), G3_scratch);
+    __ br(Assembler::zero, false, Assembler::pt, no_safepoint);
+    __ delayed()->nop();
+    __ push(state);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint));
+    __ pop(state);
+    __ bind(no_safepoint);
+  }
+
   // Narrow result if state is itos but result type is smaller.
   // Need to narrow in the return bytecode rather than in generate_return_entry
   // since compiled code callers expect the result to already be narrowed.
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -526,32 +526,57 @@
 
   // Note: we do not need to round double result; float result has the right precision
   // the poll sets the condition code, but no data registers
-  AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
-
-  if (Assembler::is_polling_page_far()) {
-    __ lea(rscratch1, polling_page);
+
+  if (SafepointMechanism::uses_thread_local_poll()) {
+#ifdef _LP64
+    __ movptr(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
     __ relocate(relocInfo::poll_return_type);
     __ testl(rax, Address(rscratch1, 0));
+#else
+    ShouldNotReachHere();
+#endif
   } else {
-    __ testl(rax, polling_page);
+    AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
+
+    if (Assembler::is_polling_page_far()) {
+      __ lea(rscratch1, polling_page);
+      __ relocate(relocInfo::poll_return_type);
+      __ testl(rax, Address(rscratch1, 0));
+    } else {
+      __ testl(rax, polling_page);
+    }
   }
   __ ret(0);
 }
 
 
 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
-  AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
   guarantee(info != NULL, "Shouldn't be NULL");
   int offset = __ offset();
-  if (Assembler::is_polling_page_far()) {
-    __ lea(rscratch1, polling_page);
-    offset = __ offset();
+  if (SafepointMechanism::uses_thread_local_poll()) {
+#ifdef _LP64
+    __ movptr(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
     add_debug_info_for_branch(info);
     __ relocate(relocInfo::poll_type);
+    address pre_pc = __ pc();
     __ testl(rax, Address(rscratch1, 0));
+    address post_pc = __ pc();
+    guarantee(pointer_delta(post_pc, pre_pc, 1) == 3, "must be exact length");
+#else
+    ShouldNotReachHere();
+#endif
   } else {
-    add_debug_info_for_branch(info);
-    __ testl(rax, polling_page);
+    AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
+    if (Assembler::is_polling_page_far()) {
+      __ lea(rscratch1, polling_page);
+      offset = __ offset();
+      add_debug_info_for_branch(info);
+      __ relocate(relocInfo::poll_type);
+      __ testl(rax, Address(rscratch1, 0));
+    } else {
+      add_debug_info_for_branch(info);
+      __ testl(rax, polling_page);
+    }
   }
   return offset;
 }
--- a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,4 +65,9 @@
 #define SUPPORT_RESERVED_STACK_AREA
 #endif
 
+#ifdef _LP64
+// X64 have implemented the local polling
+#define THREAD_LOCAL_POLL
+#endif
+
 #endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP
--- a/src/hotspot/cpu/x86/globals_x86.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/globals_x86.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -97,6 +97,12 @@
 
 define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
 
+#ifdef _LP64
+define_pd_global(bool, ThreadLocalHandshakes, true);
+#else
+define_pd_global(bool, ThreadLocalHandshakes, false);
+#endif
+
 #define ARCH_FLAGS(develop, \
                    product, \
                    diagnostic, \
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -35,6 +35,7 @@
 #include "prims/jvmtiThreadState.hpp"
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
+#include "runtime/safepointMechanism.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/thread.inline.hpp"
 
@@ -809,7 +810,8 @@
 
 void InterpreterMacroAssembler::dispatch_base(TosState state,
                                               address* table,
-                                              bool verifyoop) {
+                                              bool verifyoop,
+                                              bool generate_poll) {
   verify_FPU(1, state);
   if (VerifyActivationFrameSize) {
     Label L;
@@ -827,8 +829,24 @@
     verify_oop(rax, state);
   }
 #ifdef _LP64
+
+  Label no_safepoint, dispatch;
+  address* const safepoint_table = Interpreter::safept_table(state);
+  if (SafepointMechanism::uses_thread_local_poll() && table != safepoint_table && generate_poll) {
+    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
+
+    testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+
+    jccb(Assembler::zero, no_safepoint);
+    lea(rscratch1, ExternalAddress((address)safepoint_table));
+    jmpb(dispatch);
+  }
+
+  bind(no_safepoint);
   lea(rscratch1, ExternalAddress((address)table));
+  bind(dispatch);
   jmp(Address(rscratch1, rbx, Address::times_8));
+
 #else
   Address index(noreg, rbx, Address::times_ptr);
   ExternalAddress tbl((address)table);
@@ -837,8 +855,8 @@
 #endif // _LP64
 }
 
-void InterpreterMacroAssembler::dispatch_only(TosState state) {
-  dispatch_base(state, Interpreter::dispatch_table(state));
+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
 }
 
 void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
@@ -850,12 +868,12 @@
 }
 
 
-void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
   // load next bytecode (load before advancing _bcp_register to prevent AGI)
   load_unsigned_byte(rbx, Address(_bcp_register, step));
   // advance _bcp_register
   increment(_bcp_register, step);
-  dispatch_base(state, Interpreter::dispatch_table(state));
+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
 }
 
 void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
--- a/src/hotspot/cpu/x86/interp_masm_x86.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -49,7 +49,7 @@
                             bool check_exceptions);
 
   // base routine for all dispatches
-  void dispatch_base(TosState state, address* table, bool verifyoop = true);
+  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
 
  public:
   InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code),
@@ -184,12 +184,12 @@
   void dispatch_prolog(TosState state, int step = 0);
   void dispatch_epilog(TosState state, int step = 0);
   // dispatch via rbx (assume rbx is loaded already)
-  void dispatch_only(TosState state);
+  void dispatch_only(TosState state, bool generate_poll = false);
   // dispatch normal table via rbx (assume rbx is loaded already)
   void dispatch_only_normal(TosState state);
   void dispatch_only_noverify(TosState state);
   // load rbx from [_bcp_register + step] and dispatch via rbx
-  void dispatch_next(TosState state, int step = 0);
+  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
   // load rbx from [_bcp_register] and dispatch via rbx and table
   void dispatch_via (TosState state, address* table);
 
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -38,6 +38,8 @@
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/os.hpp"
+#include "runtime/safepoint.hpp"
+#include "runtime/safepointMechanism.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.hpp"
@@ -3759,6 +3761,25 @@
   movl(as_Address(ArrayAddress(page, index)), tmp);
 }
 
+#ifdef _LP64
+void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+    jcc(Assembler::notZero, slow_path); // handshake bit set implies poll
+  } else {
+    cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+        SafepointSynchronize::_not_synchronized);
+    jcc(Assembler::notEqual, slow_path);
+  }
+}
+#else
+void MacroAssembler::safepoint_poll(Label& slow_path) {
+  cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+      SafepointSynchronize::_not_synchronized);
+  jcc(Assembler::notEqual, slow_path);
+}
+#endif
+
 // Calls to C land
 //
 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -656,6 +656,12 @@
   // Support for serializing memory accesses between threads
   void serialize_memory(Register thread, Register tmp);
 
+#ifdef _LP64
+  void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg);
+#else
+  void safepoint_poll(Label& slow_path);
+#endif
+
   void verify_tlab();
 
   // Biased locking support
--- a/src/hotspot/cpu/x86/nativeInst_x86.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/nativeInst_x86.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -29,6 +29,7 @@
 #include "memory/allocation.hpp"
 #include "runtime/icache.hpp"
 #include "runtime/os.hpp"
+#include "runtime/safepointMechanism.hpp"
 
 // We have interfaces for the following instructions:
 // - NativeInstruction
@@ -678,6 +679,7 @@
   enum Intel_specific_constants {
     instruction_rex_prefix_mask = 0xF0,
     instruction_rex_prefix      = Assembler::REX,
+    instruction_rex_b_prefix    = Assembler::REX_B,
     instruction_code_memXregl   = 0x85,
     modrm_mask                  = 0x38, // select reg from the ModRM byte
     modrm_reg                   = 0x00  // rax
@@ -703,6 +705,16 @@
                                                           (ubyte_at(0) & 0xF0) == 0x70;  /* short jump */ }
 inline bool NativeInstruction::is_safepoint_poll() {
 #ifdef AMD64
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    // We know that the poll must have a REX_B prefix since we enforce its source to be
+    // a rex-register and the destination to be rax.
+    const bool has_rex_prefix = ubyte_at(0) == NativeTstRegMem::instruction_rex_b_prefix;
+    const bool is_test_opcode = ubyte_at(1) == NativeTstRegMem::instruction_code_memXregl;
+    const bool is_rax_target = (ubyte_at(2) & NativeTstRegMem::modrm_mask) == NativeTstRegMem::modrm_reg;
+    if (has_rex_prefix && is_test_opcode && is_rax_target) {
+      return true;
+    }
+  }
   // Try decoding a near safepoint first:
   if (ubyte_at(0) == NativeTstRegMem::instruction_code_memXregl &&
       ubyte_at(1) == 0x05) { // 00 rax 101
--- a/src/hotspot/cpu/x86/relocInfo_x86.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/relocInfo_x86.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "oops/klass.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/safepoint.hpp"
+#include "runtime/safepointMechanism.hpp"
 
 
 void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
@@ -183,9 +184,12 @@
   typedef Assembler::WhichOperand WhichOperand;
   WhichOperand which = (WhichOperand) format();
 #if !INCLUDE_JVMCI
-  assert((which == Assembler::disp32_operand) == !Assembler::is_polling_page_far(), "format not set correctly");
+  if (SafepointMechanism::uses_global_page_poll()) {
+    assert((which == Assembler::disp32_operand) == !Assembler::is_polling_page_far(), "format not set correctly");
+  }
 #endif
   if (which == Assembler::disp32_operand) {
+    assert(SafepointMechanism::uses_global_page_poll(), "should only have generated such a poll if global polling enabled");
     address orig_addr = old_addr_for(addr(), src, dest);
     NativeInstruction* oni = nativeInstruction_at(orig_addr);
     int32_t* orig_disp = (int32_t*) Assembler::locate_operand(orig_addr, which);
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -30,6 +30,7 @@
 #include "asm/macroAssembler.inline.hpp"
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
+#include "code/nativeInst.hpp"
 #include "code/vtableStubs.hpp"
 #include "interpreter/interpreter.hpp"
 #include "logging/log.hpp"
@@ -2474,15 +2475,13 @@
   // check for safepoint operation in progress and/or pending suspend requests
   {
     Label Continue;
-
-    __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
-             SafepointSynchronize::_not_synchronized);
-
-    Label L;
-    __ jcc(Assembler::notEqual, L);
+    Label slow_path;
+
+    __ safepoint_poll(slow_path, r15_thread, rscratch1);
+
     __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
     __ jcc(Assembler::equal, Continue);
-    __ bind(L);
+    __ bind(slow_path);
 
     // Don't use call_VM as it will see a possible pending exception and forward it
     // and never return here preventing us from clearing _last_native_pc down below.
@@ -3355,9 +3354,11 @@
   // sees an invalid pc.
 
   if (!cause_return) {
-    // overwrite the dummy value we pushed on entry
-    __ movptr(c_rarg0, Address(r15_thread, JavaThread::saved_exception_pc_offset()));
-    __ movptr(Address(rbp, wordSize), c_rarg0);
+    // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack.
+    // Additionally, rbx is a callee saved register and we can look at it later to determine
+    // if someone changed the return address for us!
+    __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset()));
+    __ movptr(Address(rbp, wordSize), rbx);
   }
 
   // Do the call
@@ -3387,11 +3388,38 @@
   // No exception case
   __ bind(noException);
 
+  Label no_adjust, bail;
+  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
+    // If our stashed return pc was modified by the runtime we avoid touching it
+    __ cmpptr(rbx, Address(rbp, wordSize));
+    __ jccb(Assembler::notEqual, no_adjust);
+
+#ifdef ASSERT
+    // Verify the correct encoding of the poll we're about to skip.
+    // See NativeInstruction::is_safepoint_poll()
+    __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix);
+    __ jcc(Assembler::notEqual, bail);
+    __ cmpb(Address(rbx, 1), NativeTstRegMem::instruction_code_memXregl);
+    __ jcc(Assembler::notEqual, bail);
+    // Mask out the modrm bits
+    __ testb(Address(rbx, 2), NativeTstRegMem::modrm_mask);
+    // rax encodes to 0, so if the bits are nonzero it's incorrect
+    __ jcc(Assembler::notZero, bail);
+#endif
+    // Adjust return pc forward to step over the safepoint poll instruction
+    __ addptr(Address(rbp, wordSize), 3);
+  }
+
+  __ bind(no_adjust);
   // Normal exit, restore registers and exit.
   RegisterSaver::restore_live_registers(masm, save_vectors);
-
   __ ret(0);
 
+#ifdef ASSERT
+  __ bind(bail);
+  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
+#endif
+
   // Make sure all code is generated
   masm->flush();
 
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1141,14 +1141,17 @@
   // check for safepoint operation in progress and/or pending suspend requests
   {
     Label Continue;
-    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
-             SafepointSynchronize::_not_synchronized);
+    Label slow_path;
 
-    Label L;
-    __ jcc(Assembler::notEqual, L);
+#ifndef _LP64
+    __ safepoint_poll(slow_path);
+#else
+    __ safepoint_poll(slow_path, r15_thread, rscratch1);
+#endif
+
     __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
     __ jcc(Assembler::equal, Continue);
-    __ bind(L);
+    __ bind(slow_path);
 
     // Don't use call_VM as it will see a possible pending exception
     // and forward it and never return here preventing us from
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -190,11 +190,7 @@
     // c_rarg1: scratch (rsi on non-Win64, rdx on Win64)
 
     Label slow_path;
-    // If we need a safepoint check, generate full interpreter entry.
-    ExternalAddress state(SafepointSynchronize::address_of_state());
-    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
-             SafepointSynchronize::_not_synchronized);
-    __ jcc(Assembler::notEqual, slow_path);
+    __ safepoint_poll(slow_path, r15_thread, rscratch1);
 
     // We don't generate local frame and don't align stack because
     // we call stub code and there is no safepoint on this path.
@@ -240,11 +236,7 @@
     // r13: senderSP must preserved for slow path, set SP to it on fast path
 
     Label slow_path;
-    // If we need a safepoint check, generate full interpreter entry.
-    ExternalAddress state(SafepointSynchronize::address_of_state());
-    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
-             SafepointSynchronize::_not_synchronized);
-    __ jcc(Assembler::notEqual, slow_path);
+    __ safepoint_poll(slow_path, r15_thread, rscratch1);
 
     // We don't generate local frame and don't align stack because
     // we call stub code and there is no safepoint on this path.
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -2084,7 +2084,7 @@
     __ addptr(rbcp, rdx);
     // jsr returns atos that is not an oop
     __ push_i(rax);
-    __ dispatch_only(vtos);
+    __ dispatch_only(vtos, true);
     return;
   }
 
@@ -2203,7 +2203,7 @@
   // rax: return bci for jsr's, unused otherwise
   // rbx: target bytecode
   // r13: target bcp
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, true);
 
   if (UseLoopCounter) {
     if (ProfileInterpreter) {
@@ -2332,7 +2332,7 @@
   __ movptr(rbcp, Address(rax, Method::const_offset()));
   __ lea(rbcp, Address(rbcp, rbx, Address::times_1,
                       ConstMethod::codes_offset()));
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, true);
 }
 
 void TemplateTable::wide_ret() {
@@ -2343,7 +2343,7 @@
   __ get_method(rax);
   __ movptr(rbcp, Address(rax, Method::const_offset()));
   __ lea(rbcp, Address(rbcp, rbx, Address::times_1, ConstMethod::codes_offset()));
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, true);
 }
 
 void TemplateTable::tableswitch() {
@@ -2373,7 +2373,7 @@
   LP64_ONLY(__ movl2ptr(rdx, rdx));
   __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1));
   __ addptr(rbcp, rdx);
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, true);
   // handle default
   __ bind(default_case);
   __ profile_switch_default(rax);
@@ -2421,7 +2421,7 @@
   __ movl2ptr(rdx, rdx);
   __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1));
   __ addptr(rbcp, rdx);
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, true);
 }
 
 void TemplateTable::fast_binaryswitch() {
@@ -2525,7 +2525,7 @@
 
   __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1));
   __ addptr(rbcp, j);
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, true);
 
   // default case -> j = default offset
   __ bind(default_case);
@@ -2539,7 +2539,7 @@
 
   __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1));
   __ addptr(rbcp, j);
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, true);
 }
 
 void TemplateTable::_return(TosState state) {
@@ -2570,6 +2570,20 @@
   }
 #endif
 
+#ifdef _LP64
+  if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
+    Label no_safepoint;
+    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
+    __ testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+    __ jcc(Assembler::zero, no_safepoint);
+    __ push(state);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::at_safepoint));
+    __ pop(state);
+    __ bind(no_safepoint);
+  }
+#endif
+
   // Narrow result if state is itos but result type is smaller.
   // Need to narrow in the return bytecode rather than in generate_return_entry
   // since compiled code callers expect the result to already be narrowed.
--- a/src/hotspot/cpu/x86/x86_64.ad	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/x86/x86_64.ad	Thu Aug 31 10:00:28 2017 +0200
@@ -317,6 +317,18 @@
 // Singleton class for TLS pointer
 reg_class ptr_r15_reg(R15, R15_H);
 
+// The registers which can be used for
+// a thread local safepoint poll
+// * R12 is reserved for heap base
+// * R13 cannot be encoded for addressing without an offset byte
+// * R15 is reserved for the JavaThread
+reg_class ptr_rex_reg(R8,  R8_H,
+                      R9,  R9_H,
+                      R10, R10_H,
+                      R11, R11_H,
+                      R14, R14_H);
+
+
 // Class for all long registers (excluding RSP)
 reg_class long_reg_with_rbp(RAX, RAX_H,
                             RDX, RDX_H,
@@ -566,7 +578,7 @@
 // it does if the polling page is more than disp32 away.
 bool SafePointNode::needs_polling_address_input()
 {
-  return Assembler::is_polling_page_far();
+  return SafepointMechanism::uses_thread_local_poll() || Assembler::is_polling_page_far();
 }
 
 //
@@ -938,7 +950,11 @@
   st->print_cr("popq   rbp");
   if (do_polling() && C->is_method_compilation()) {
     st->print("\t");
-    if (Assembler::is_polling_page_far()) {
+    if (SafepointMechanism::uses_thread_local_poll()) {
+      st->print_cr("movq   rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
+                   "testl  rax, [rscratch1]\t"
+                   "# Safepoint: poll for GC");
+    } else if (Assembler::is_polling_page_far()) {
       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
                    "testl  rax, [rscratch1]\t"
                    "# Safepoint: poll for GC");
@@ -989,13 +1005,19 @@
 
   if (do_polling() && C->is_method_compilation()) {
     MacroAssembler _masm(&cbuf);
-    AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
-    if (Assembler::is_polling_page_far()) {
-      __ lea(rscratch1, polling_page);
+    if (SafepointMechanism::uses_thread_local_poll()) {
+      __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
       __ relocate(relocInfo::poll_return_type);
       __ testl(rax, Address(rscratch1, 0));
     } else {
-      __ testl(rax, polling_page);
+      AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
+      if (Assembler::is_polling_page_far()) {
+        __ lea(rscratch1, polling_page);
+        __ relocate(relocInfo::poll_return_type);
+        __ testl(rax, Address(rscratch1, 0));
+      } else {
+        __ testl(rax, polling_page);
+      }
     }
   }
 }
@@ -3511,6 +3533,16 @@
   interface(REG_INTER);
 %}
 
+operand rex_RegP()
+%{
+  constraint(ALLOC_IN_RC(ptr_rex_reg));
+  match(RegP);
+  match(rRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 operand rRegL()
 %{
   constraint(ALLOC_IN_RC(long_reg));
@@ -12060,7 +12092,7 @@
 // Safepoint Instructions
 instruct safePoint_poll(rFlagsReg cr)
 %{
-  predicate(!Assembler::is_polling_page_far());
+  predicate(!Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
   match(SafePoint);
   effect(KILL cr);
 
@@ -12076,7 +12108,7 @@
 
 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
 %{
-  predicate(Assembler::is_polling_page_far());
+  predicate(Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
   match(SafePoint poll);
   effect(KILL cr, USE poll);
 
@@ -12090,6 +12122,26 @@
   ins_pipe(ialu_reg_mem);
 %}
 
+instruct safePoint_poll_tls(rFlagsReg cr, rex_RegP poll)
+%{
+  predicate(SafepointMechanism::uses_thread_local_poll());
+  match(SafePoint poll);
+  effect(KILL cr, USE poll);
+
+  format %{ "testl  rax, [$poll]\t"
+            "# Safepoint: poll for GC" %}
+  ins_cost(125);
+  size(3); /* setting an explicit size will cause debug builds to assert if size is incorrect */
+  ins_encode %{
+    __ relocate(relocInfo::poll_type);
+    address pre_pc = __ pc();
+    __ testl(rax, Address($poll$$Register, 0));
+    address post_pc = __ pc();
+    guarantee(pre_pc[0] == 0x41 && pre_pc[1] == 0x85, "must emit #rex test-ax [reg]");
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
 // ============================================================================
 // Procedure Call/Return Instructions
 // Call Java Static Instruction
--- a/src/hotspot/cpu/zero/globals_zero.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/cpu/zero/globals_zero.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -81,6 +81,8 @@
 // No performance work done here yet.
 define_pd_global(bool, CompactStrings, false);
 
+define_pd_global(bool, ThreadLocalHandshakes, false);
+
 #define ARCH_FLAGS(develop, \
                    product, \
                    diagnostic, \
--- a/src/hotspot/os/aix/os_aix.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/os/aix/os_aix.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -3477,75 +3477,6 @@
     LoadedLibraries::print(tty);
   }
 
-  const int page_size = Aix::page_size();
-  const int map_size = page_size;
-
-  address map_address = (address) MAP_FAILED;
-  const int prot  = PROT_READ;
-  const int flags = MAP_PRIVATE|MAP_ANONYMOUS;
-
-  // Use optimized addresses for the polling page,
-  // e.g. map it to a special 32-bit address.
-  if (OptimizePollingPageLocation) {
-    // architecture-specific list of address wishes:
-    address address_wishes[] = {
-      // AIX: addresses lower than 0x30000000 don't seem to work on AIX.
-      // PPC64: all address wishes are non-negative 32 bit values where
-      // the lower 16 bits are all zero. we can load these addresses
-      // with a single ppc_lis instruction.
-      (address) 0x30000000, (address) 0x31000000,
-      (address) 0x32000000, (address) 0x33000000,
-      (address) 0x40000000, (address) 0x41000000,
-      (address) 0x42000000, (address) 0x43000000,
-      (address) 0x50000000, (address) 0x51000000,
-      (address) 0x52000000, (address) 0x53000000,
-      (address) 0x60000000, (address) 0x61000000,
-      (address) 0x62000000, (address) 0x63000000
-    };
-    int address_wishes_length = sizeof(address_wishes)/sizeof(address);
-
-    // iterate over the list of address wishes:
-    for (int i=0; i<address_wishes_length; i++) {
-      // Try to map with current address wish.
-      // AIX: AIX needs MAP_FIXED if we provide an address and mmap will
-      // fail if the address is already mapped.
-      map_address = (address) ::mmap(address_wishes[i] - (ssize_t)page_size,
-                                     map_size, prot,
-                                     flags | MAP_FIXED,
-                                     -1, 0);
-      trcVerbose("SafePoint Polling  Page address: %p (wish) => %p",
-                   address_wishes[i], map_address + (ssize_t)page_size);
-
-      if (map_address + (ssize_t)page_size == address_wishes[i]) {
-        // Map succeeded and map_address is at wished address, exit loop.
-        break;
-      }
-
-      if (map_address != (address) MAP_FAILED) {
-        // Map succeeded, but polling_page is not at wished address, unmap and continue.
-        ::munmap(map_address, map_size);
-        map_address = (address) MAP_FAILED;
-      }
-      // Map failed, continue loop.
-    }
-  } // end OptimizePollingPageLocation
-
-  if (map_address == (address) MAP_FAILED) {
-    map_address = (address) ::mmap(NULL, map_size, prot, flags, -1, 0);
-  }
-  guarantee(map_address != MAP_FAILED, "os::init_2: failed to allocate polling page");
-  os::set_polling_page(map_address);
-
-  if (!UseMembar) {
-    address mem_serialize_page = (address) ::mmap(NULL, Aix::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-    guarantee(mem_serialize_page != NULL, "mmap Failed for memory serialize page");
-    os::set_memory_serialize_page(mem_serialize_page);
-
-    trcVerbose("Memory Serialize  Page address: %p - %p, size %IX (%IB)",
-        mem_serialize_page, mem_serialize_page + Aix::page_size(),
-        Aix::page_size(), Aix::page_size());
-  }
-
   // initialize suspend/resume support - must do this before signal_sets_init()
   if (SR_initialize() != 0) {
     perror("SR_initialize failed");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/os/aix/safepointMechanism_aix.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "logging/log.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/os.hpp"
+#include "runtime/safepointMechanism.hpp"
+
+void SafepointMechanism::pd_initialize() {
+  char* map_address = MAP_FAILED;
+  size_t page_size = os::vm_page_size();
+  // Use optimized addresses for the polling page,
+  // e.g. map it to a special 32-bit address.
+  if (OptimizePollingPageLocation) {
+    // architecture-specific list of address wishes:
+    char* address_wishes[] = {
+        // AIX: addresses lower than 0x30000000 don't seem to work on AIX.
+        // PPC64: all address wishes are non-negative 32 bit values where
+        // the lower 16 bits are all zero. we can load these addresses
+        // with a single ppc_lis instruction.
+        (address) 0x30000000, (address) 0x31000000,
+        (address) 0x32000000, (address) 0x33000000,
+        (address) 0x40000000, (address) 0x41000000,
+        (address) 0x42000000, (address) 0x43000000,
+        (address) 0x50000000, (address) 0x51000000,
+        (address) 0x52000000, (address) 0x53000000,
+        (address) 0x60000000, (address) 0x61000000,
+        (address) 0x62000000, (address) 0x63000000
+    };
+    int address_wishes_length = sizeof(address_wishes)/sizeof(address);
+
+    // iterate over the list of address wishes:
+    for (int i=0; i<address_wishes_length; i++) {
+      // Try to map with current address wish.
+      // AIX: AIX needs MAP_FIXED if we provide an address and mmap will
+      // fail if the address is already mapped.
+      map_address = os::attempt_reserve_memory_at(page_size, address_wishes[i] - page_size);
+      log_debug(os)("SafePoint Polling  Page address: %p (wish) => %p",
+          address_wishes[i], map_address + (ssize_t)page_size);
+
+      if (map_address + (ssize_t)page_size == address_wishes[i]) {
+        // Map succeeded and map_address is at wished address, exit loop.
+        break;
+      }
+
+      if (map_address != (address) MAP_FAILED) {
+        // Map succeeded, but polling_page is not at wished address, unmap and continue.
+        os::release_memory(map_address, page_size);
+        map_address = (address) MAP_FAILED;
+      }
+      // Map failed, continue loop.
+    }
+  }
+  if (map_address == (address) MAP_FAILED) {
+    map_address = os::reserve_memory(page_size, NULL, page_size);
+  }
+  guarantee(map_address != MAP_FAILED, "SafepointMechanism::pd_initialize: failed to allocate polling page");
+  os::commit_memory_or_exit(map_address, page_size, false, "Unable to commit memory for polling page");
+  os::set_polling_page((address)(map_address));
+}
--- a/src/hotspot/os/bsd/os_bsd.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/os/bsd/os_bsd.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -3391,20 +3391,6 @@
 
   os::Posix::init_2();
 
-  // Allocate a single page and mark it as readable for safepoint polling
-  address polling_page = (address) ::mmap(NULL, Bsd::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-  guarantee(polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page");
-
-  os::set_polling_page(polling_page);
-  log_info(os)("SafePoint Polling address: " INTPTR_FORMAT, p2i(polling_page));
-
-  if (!UseMembar) {
-    address mem_serialize_page = (address) ::mmap(NULL, Bsd::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-    guarantee(mem_serialize_page != MAP_FAILED, "mmap Failed for memory serialize page");
-    os::set_memory_serialize_page(mem_serialize_page);
-    log_info(os)("Memory Serialize Page address: " INTPTR_FORMAT, p2i(mem_serialize_page));
-  }
-
   // initialize suspend/resume support - must do this before signal_sets_init()
   if (SR_initialize() != 0) {
     perror("SR_initialize failed");
--- a/src/hotspot/os/linux/os_linux.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/os/linux/os_linux.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -4805,20 +4805,6 @@
 
   Linux::fast_thread_clock_init();
 
-  // Allocate a single page and mark it as readable for safepoint polling
-  address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-  guarantee(polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page");
-
-  os::set_polling_page(polling_page);
-  log_info(os)("SafePoint Polling address: " INTPTR_FORMAT, p2i(polling_page));
-
-  if (!UseMembar) {
-    address mem_serialize_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-    guarantee(mem_serialize_page != MAP_FAILED, "mmap Failed for memory serialize page");
-    os::set_memory_serialize_page(mem_serialize_page);
-    log_info(os)("Memory Serialize Page address: " INTPTR_FORMAT, p2i(mem_serialize_page));
-  }
-
   // initialize suspend/resume support - must do this before signal_sets_init()
   if (SR_initialize() != 0) {
     perror("SR_initialize failed");
--- a/src/hotspot/os/solaris/os_solaris.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/os/solaris/os_solaris.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -2190,10 +2190,6 @@
 
 static int page_size = -1;
 
-// The mmap MAP_ALIGN flag is supported on Solaris 9 and later.  init_2() will
-// clear this var if support is not available.
-static bool has_map_align = true;
-
 int os::vm_page_size() {
   assert(page_size != -1, "must call os::init");
   return page_size;
@@ -2560,7 +2556,7 @@
 
   if (fixed) {
     flags |= MAP_FIXED;
-  } else if (has_map_align && (alignment_hint > (size_t) vm_page_size())) {
+  } else if (alignment_hint > (size_t) vm_page_size()) {
     flags |= MAP_ALIGN;
     addr = (char*) alignment_hint;
   }
@@ -4222,28 +4218,6 @@
   // try to enable extended file IO ASAP, see 6431278
   os::Solaris::try_enable_extended_io();
 
-  // Allocate a single page and mark it as readable for safepoint polling.  Also
-  // use this first mmap call to check support for MAP_ALIGN.
-  address polling_page = (address)Solaris::mmap_chunk((char*)page_size,
-                                                      page_size,
-                                                      MAP_PRIVATE | MAP_ALIGN,
-                                                      PROT_READ);
-  if (polling_page == NULL) {
-    has_map_align = false;
-    polling_page = (address)Solaris::mmap_chunk(NULL, page_size, MAP_PRIVATE,
-                                                PROT_READ);
-  }
-
-  os::set_polling_page(polling_page);
-  log_info(os)("SafePoint Polling address: " INTPTR_FORMAT, p2i(polling_page));
-
-  if (!UseMembar) {
-    address mem_serialize_page = (address)Solaris::mmap_chunk(NULL, page_size, MAP_PRIVATE, PROT_READ | PROT_WRITE);
-    guarantee(mem_serialize_page != NULL, "mmap Failed for memory serialize page");
-    os::set_memory_serialize_page(mem_serialize_page);
-    log_info(os)("Memory Serialize Page address: " INTPTR_FORMAT, p2i(mem_serialize_page));
-  }
-
   // Check and sets minimum stack sizes against command line options
   if (Posix::set_minimum_stack_sizes() == JNI_ERR) {
     return JNI_ERR;
--- a/src/hotspot/os/windows/os_windows.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/os/windows/os_windows.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -3911,27 +3911,6 @@
 
 // this is called _after_ the global arguments have been parsed
 jint os::init_2(void) {
-  // Allocate a single page and mark it as readable for safepoint polling
-  address polling_page = (address)VirtualAlloc(NULL, os::vm_page_size(), MEM_RESERVE, PAGE_READONLY);
-  guarantee(polling_page != NULL, "Reserve Failed for polling page");
-
-  address return_page  = (address)VirtualAlloc(polling_page, os::vm_page_size(), MEM_COMMIT, PAGE_READONLY);
-  guarantee(return_page != NULL, "Commit Failed for polling page");
-
-  os::set_polling_page(polling_page);
-  log_info(os)("SafePoint Polling address: " INTPTR_FORMAT, p2i(polling_page));
-
-  if (!UseMembar) {
-    address mem_serialize_page = (address)VirtualAlloc(NULL, os::vm_page_size(), MEM_RESERVE, PAGE_READWRITE);
-    guarantee(mem_serialize_page != NULL, "Reserve Failed for memory serialize page");
-
-    return_page  = (address)VirtualAlloc(mem_serialize_page, os::vm_page_size(), MEM_COMMIT, PAGE_READWRITE);
-    guarantee(return_page != NULL, "Commit Failed for memory serialize page");
-
-    os::set_memory_serialize_page(mem_serialize_page);
-    log_info(os)("Memory Serialize Page address: " INTPTR_FORMAT, p2i(mem_serialize_page));
-  }
-
   // Setup Windows Exceptions
 
   // for debugging float code generation bugs
--- a/src/hotspot/os_cpu/linux_sparc/thread_linux_sparc.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/os_cpu/linux_sparc/thread_linux_sparc.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -68,7 +68,7 @@
   address o_reg_temps(int i) { return (address)&_o_reg_temps[i]; }
 #endif
 
-  static int saved_exception_npc_offset_in_bytes() { return offset_of(JavaThread,_saved_exception_npc); }
+  static ByteSize saved_exception_npc_offset() { return byte_offset_of(JavaThread,_saved_exception_npc); }
 
   address  saved_exception_npc()             { return _saved_exception_npc; }
   void set_saved_exception_npc(address a)    { _saved_exception_npc = a; }
--- a/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -447,7 +447,7 @@
       // a fault inside compiled code, the interpreter, or a stub
 
       // Support Safepoint Polling
-      if ( sig == SIGSEGV && (address)info->si_addr == os::get_polling_page() ) {
+      if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
         stub = SharedRuntime::get_poll_stub(pc);
       }
 
--- a/src/hotspot/os_cpu/solaris_sparc/thread_solaris_sparc.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/os_cpu/solaris_sparc/thread_solaris_sparc.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -63,7 +63,7 @@
 
   static int o_reg_temps_offset_in_bytes() { return offset_of(JavaThread, _o_reg_temps); }
 
-  static int saved_exception_npc_offset_in_bytes() { return offset_of(JavaThread,_saved_exception_npc); }
+  static ByteSize saved_exception_npc_offset() { return byte_offset_of(JavaThread,_saved_exception_npc); }
 
   address  saved_exception_npc()             { return _saved_exception_npc; }
   void set_saved_exception_npc(address a)    { _saved_exception_npc = a; }
--- a/src/hotspot/share/interpreter/templateInterpreter.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/interpreter/templateInterpreter.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -162,6 +162,7 @@
   static int        distance_from_dispatch_table(TosState state){ return _active_table.distance_from(state); }
   static address*   normal_table(TosState state)                { return _normal_table.table_for(state); }
   static address*   normal_table()                              { return _normal_table.table_for(); }
+  static address*   safept_table(TosState state)                { return _safept_table.table_for(state); }
 
   // Support for invokes
   static address*   invoke_return_entry_table()                 { return _invoke_return_entry; }
--- a/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -37,6 +37,7 @@
 #include "oops/oop.inline.hpp"
 #include "oops/objArrayOop.inline.hpp"
 #include "runtime/javaCalls.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
 #include "utilities/align.hpp"
 
 // frequently used constants
@@ -854,9 +855,10 @@
     }
     last_pc_offset = pc_offset;
 
-    if (SafepointSynchronize::do_call_back()) {
+    JavaThread* thread = JavaThread::current();
+    if (SafepointMechanism::poll(thread)) {
       // this is a hacky way to force a safepoint check but nothing else was jumping out at me.
-      ThreadToNativeFromVM ttnfv(JavaThread::current());
+      ThreadToNativeFromVM ttnfv(thread);
     }
   }
 
--- a/src/hotspot/share/logging/logTag.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/logging/logTag.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -67,6 +67,7 @@
   LOG_TAG(fingerprint) \
   LOG_TAG(freelist) \
   LOG_TAG(gc) \
+  LOG_TAG(handshake) \
   LOG_TAG(hashtables) \
   LOG_TAG(heap) \
   LOG_TAG(humongous) \
--- a/src/hotspot/share/opto/parse1.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/opto/parse1.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -2286,7 +2286,14 @@
 
   // Create a node for the polling address
   if( add_poll_param ) {
-    Node *polladr = ConPNode::make((address)os::get_polling_page());
+    Node *polladr;
+    if (SafepointMechanism::uses_thread_local_poll()) {
+      Node *thread = _gvn.transform(new ThreadLocalNode());
+      Node *polling_page_load_addr = _gvn.transform(basic_plus_adr(top(), thread, in_bytes(Thread::polling_page_offset())));
+      polladr = make_load(control(), polling_page_load_addr, TypeRawPtr::BOTTOM, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
+    } else {
+      polladr = ConPNode::make((address)os::get_polling_page());
+    }
     sfpnt->init_req(TypeFunc::Parms+0, _gvn.transform(polladr));
   }
 
--- a/src/hotspot/share/prims/whitebox.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/prims/whitebox.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -49,6 +49,7 @@
 #include "runtime/arguments.hpp"
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/deoptimization.hpp"
+#include "runtime/handshake.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/os.hpp"
@@ -1727,6 +1728,40 @@
 #endif
 WB_END
 
+WB_ENTRY(jint, WB_HandshakeWalkStack(JNIEnv* env, jobject wb, jobject thread_handle, jboolean all_threads))
+  class TraceSelfClosure : public ThreadClosure {
+    jint _num_threads_completed;
+
+    void do_thread(Thread* th) {
+      assert(th->is_Java_thread(), "sanity");
+      JavaThread* jt = (JavaThread*)th;
+      ResourceMark rm;
+
+      jt->print_on(tty);
+      jt->print_stack_on(tty);
+      tty->cr();
+      Atomic::inc(&_num_threads_completed);
+    }
+
+  public:
+    TraceSelfClosure() : _num_threads_completed(0) {}
+
+    jint num_threads_completed() const { return _num_threads_completed; }
+  };
+  TraceSelfClosure tsc;
+
+  if (all_threads) {
+    Handshake::execute(&tsc);
+  } else {
+    oop thread_oop = JNIHandles::resolve(thread_handle);
+    if (thread_oop != NULL) {
+      JavaThread* target = java_lang_Thread::thread(thread_oop);
+      Handshake::execute(&tsc, target);
+    }
+  }
+  return tsc.num_threads_completed();
+WB_END
+
 //Some convenience methods to deal with objects from java
 int WhiteBox::offset_for_field(const char* field_name, oop object,
     Symbol* signature_symbol) {
@@ -2038,6 +2073,7 @@
   {CC"areOpenArchiveHeapObjectsMapped",   CC"()Z",    (void*)&WB_AreOpenArchiveHeapObjectsMapped},
   {CC"isCDSIncludedInVmBuild",            CC"()Z",    (void*)&WB_IsCDSIncludedInVmBuild },
   {CC"clearInlineCaches0",  CC"(Z)V",                 (void*)&WB_ClearInlineCaches },
+  {CC"handshakeWalkStack", CC"(Ljava/lang/Thread;Z)I", (void*)&WB_HandshakeWalkStack },
   {CC"addCompilerDirective",    CC"(Ljava/lang/String;)I",
                                                       (void*)&WB_AddCompilerDirective },
   {CC"removeCompilerDirective",   CC"(I)V",             (void*)&WB_RemoveCompilerDirective },
--- a/src/hotspot/share/runtime/arguments.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/arguments.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -50,6 +50,7 @@
 #include "runtime/globals_extension.hpp"
 #include "runtime/java.hpp"
 #include "runtime/os.hpp"
+#include "runtime/safepointMechanism.hpp"
 #include "runtime/vm_version.hpp"
 #include "services/management.hpp"
 #include "services/memTracker.hpp"
@@ -4620,6 +4621,32 @@
   }
 #endif
 
+  bool aot_enabled = UseAOT && AOTLibrary != NULL;
+  bool jvmci_enabled = NOT_JVMCI(false) JVMCI_ONLY(EnableJVMCI || UseJVMCICompiler);
+  bool handshakes_supported = SafepointMechanism::supports_thread_local_poll() && !aot_enabled && !jvmci_enabled && ThreadLocalHandshakes;
+  // ThreadLocalHandshakesConstraintFunc handles the constraints.
+  // Here we try to figure out if a mutual exclusive option have been set that conflict with a default.
+  if (handshakes_supported) {
+    FLAG_SET_DEFAULT(UseAOT, false); // Clear the AOT flag to make sure it doesn't try to initialize.
+  } else {
+    if (FLAG_IS_DEFAULT(ThreadLocalHandshakes) && ThreadLocalHandshakes) {
+      if (aot_enabled) {
+        // If user enabled AOT but ThreadLocalHandshakes is at default set it to false.
+        log_debug(ergo)("Disabling ThreadLocalHandshakes for UseAOT.");
+        FLAG_SET_DEFAULT(ThreadLocalHandshakes, false);
+      } else if (jvmci_enabled){
+        // If user enabled JVMCI but ThreadLocalHandshakes is at default set it to false.
+        log_debug(ergo)("Disabling ThreadLocalHandshakes for EnableJVMCI/UseJVMCICompiler.");
+        FLAG_SET_DEFAULT(ThreadLocalHandshakes, false);
+      }
+    }
+  }
+  if (FLAG_IS_DEFAULT(ThreadLocalHandshakes) || !SafepointMechanism::supports_thread_local_poll()) {
+    log_debug(ergo)("ThreadLocalHandshakes %s", ThreadLocalHandshakes ? "enabled." : "disabled.");
+  } else {
+    log_info(ergo)("ThreadLocalHandshakes %s", ThreadLocalHandshakes ? "enabled." : "disabled.");
+  }
+
   return JNI_OK;
 }
 
--- a/src/hotspot/share/runtime/commandLineFlagConstraintsRuntime.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/commandLineFlagConstraintsRuntime.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 #include "runtime/commandLineFlagConstraintsRuntime.hpp"
 #include "runtime/commandLineFlagRangeList.hpp"
 #include "runtime/globals.hpp"
+#include "runtime/safepointMechanism.hpp"
 #include "runtime/task.hpp"
 #include "utilities/defaultStream.hpp"
 
@@ -130,3 +131,17 @@
     return Flag::SUCCESS;
   }
 }
+
+Flag::Error ThreadLocalHandshakesConstraintFunc(bool value, bool verbose) {
+  if (value) {
+    if (!SafepointMechanism::supports_thread_local_poll()) {
+      CommandLineError::print(verbose, "ThreadLocalHandshakes not yet supported on this platform\n");
+      return Flag::VIOLATES_CONSTRAINT;
+    }
+    if (UseAOT JVMCI_ONLY(|| EnableJVMCI || UseJVMCICompiler)) {
+      CommandLineError::print(verbose, "ThreadLocalHandshakes not yet supported in combination with AOT or JVMCI\n");
+      return Flag::VIOLATES_CONSTRAINT;
+    }
+  }
+  return Flag::SUCCESS;
+}
--- a/src/hotspot/share/runtime/commandLineFlagConstraintsRuntime.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/commandLineFlagConstraintsRuntime.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,4 +45,7 @@
 
 Flag::Error PerfDataSamplingIntervalFunc(intx value, bool verbose);
 
+Flag::Error ThreadLocalHandshakesConstraintFunc(bool value, bool verbose);
+
+
 #endif /* SHARE_VM_RUNTIME_COMMANDLINEFLAGCONSTRAINTSRUNTIME_HPP */
--- a/src/hotspot/share/runtime/globals.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/globals.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -598,6 +598,13 @@
   develop(bool, CleanChunkPoolAsync, true,                                  \
           "Clean the chunk pool asynchronously")                            \
                                                                             \
+  product_pd(bool, ThreadLocalHandshakes,                                   \
+          "Use thread-local polls instead of global poll for safepoints.")  \
+          constraint(ThreadLocalHandshakesConstraintFunc,AfterErgo)         \
+                                                                            \
+  diagnostic(uint, HandshakeTimeout, 0,                                     \
+          "If nonzero set a timeout in milliseconds for handshakes")        \
+                                                                            \
   experimental(bool, AlwaysSafeConstructors, false,                         \
           "Force safe construction, as if all fields are final.")           \
                                                                             \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/runtime/handshake.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "logging/log.hpp"
+#include "logging/logStream.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/handshake.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/semaphore.hpp"
+#include "runtime/task.hpp"
+#include "runtime/timerTrace.hpp"
+#include "runtime/thread.hpp"
+#include "runtime/vmThread.hpp"
+#include "utilities/formatBuffer.hpp"
+#include "utilities/preserveException.hpp"
+
+#define ALL_JAVA_THREADS(X) for (JavaThread* X = Threads::first(); X; X = X->next())
+
+class HandshakeOperation: public StackObj {
+public:
+  virtual void do_handshake(JavaThread* thread) = 0;
+  virtual void cancel_handshake(JavaThread* thread) = 0;
+};
+
+class HandshakeThreadsOperation: public HandshakeOperation {
+  Semaphore _done;
+  ThreadClosure* _thread_cl;
+
+public:
+  HandshakeThreadsOperation(ThreadClosure* cl) : _done(0), _thread_cl(cl) {}
+  void do_handshake(JavaThread* thread);
+  void cancel_handshake(JavaThread* thread) { _done.signal(); };
+
+  bool thread_has_completed() { return _done.trywait(); }
+};
+
+class VM_Handshake: public VM_Operation {
+  HandshakeThreadsOperation* const _op;
+  const jlong _handshake_timeout;
+ public:
+  bool evaluate_at_safepoint() const { return false; }
+
+  bool evaluate_concurrently() const { return false; }
+
+ protected:
+
+  VM_Handshake(HandshakeThreadsOperation* op) :
+      _op(op),
+      _handshake_timeout(TimeHelper::millis_to_counter(HandshakeTimeout)) {}
+
+  void set_handshake(JavaThread* target) {
+    target->set_handshake_operation(_op);
+  }
+
+  // This method returns true for threads completed their operation
+  // and true for threads canceled their operation.
+  // A cancellation can happen if the thread is exiting.
+  bool poll_for_completed_thread() { return _op->thread_has_completed(); }
+
+  bool handshake_has_timed_out(jlong start_time);
+  static void handle_timeout();
+};
+
+bool VM_Handshake::handshake_has_timed_out(jlong start_time) {
+  // Check if handshake operation has timed out
+  if (_handshake_timeout > 0) {
+    return os::elapsed_counter() >= (start_time + _handshake_timeout);
+  }
+  return false;
+}
+
+void VM_Handshake::handle_timeout() {
+  LogStreamHandle(Warning, handshake) log_stream;
+  MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
+  ALL_JAVA_THREADS(thr) {
+    if (thr->has_handshake()) {
+      log_stream.print("Thread " PTR_FORMAT " has not cleared its handshake op", p2i(thr));
+      thr->print_thread_state_on(&log_stream);
+    }
+  }
+  log_stream.flush();
+  fatal("Handshake operation timed out");
+}
+
+
+class VM_HandshakeOneThread: public VM_Handshake {
+  JavaThread* _target;
+  bool _thread_alive;
+ public:
+  VM_HandshakeOneThread(HandshakeThreadsOperation* op, JavaThread* target) :
+    VM_Handshake(op), _target(target), _thread_alive(false) {}
+
+  void doit() {
+    TraceTime timer("Performing single-target operation (vmoperation doit)", TRACETIME_LOG(Info, handshake));
+
+    {
+      MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
+      if (Threads::includes(_target)) {
+        set_handshake(_target);
+        _thread_alive = true;
+      }
+    }
+
+    if (!_thread_alive) {
+      return;
+    }
+
+    if (!UseMembar) {
+      os::serialize_thread_states();
+    }
+
+    log_trace(handshake)("Thread signaled, begin processing by VMThtread");
+    jlong start_time = os::elapsed_counter();
+    do {
+      if (handshake_has_timed_out(start_time)) {
+        handle_timeout();
+      }
+
+      MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
+      _target->handshake_process_by_vmthread();
+
+    } while (!poll_for_completed_thread());
+  }
+
+  VMOp_Type type() const { return VMOp_HandshakeOneThread; }
+
+  bool thread_alive() const { return _thread_alive; }
+};
+
+class VM_HandshakeAllThreads: public VM_Handshake {
+ public:
+  VM_HandshakeAllThreads(HandshakeThreadsOperation* op) : VM_Handshake(op) {}
+
+  void doit() {
+    TraceTime timer("Performing operation (vmoperation doit)", TRACETIME_LOG(Info, handshake));
+
+    int number_of_threads_issued = -1;
+    int number_of_threads_completed = 0;
+    {
+      MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
+      number_of_threads_issued = Threads::number_of_threads();
+
+      ALL_JAVA_THREADS(thr) {
+        set_handshake(thr);
+      }
+    }
+
+    if (!UseMembar) {
+      os::serialize_thread_states();
+    }
+
+    log_debug(handshake)("Threads signaled, begin processing blocked threads by VMThtread");
+    const jlong start_time = os::elapsed_counter();
+    do {
+      // Check if handshake operation has timed out
+      if (handshake_has_timed_out(start_time)) {
+        handle_timeout();
+      }
+
+      // Have VM thread perform the handshake operation for blocked threads.
+      // Observing a blocked state may of course be transient but the processing is guarded
+      // by semaphores and we optimistically begin by working on the blocked threads
+      {
+          MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
+          ALL_JAVA_THREADS(thr) {
+            thr->handshake_process_by_vmthread();
+          }
+      }
+
+      while (poll_for_completed_thread()) {
+        number_of_threads_completed++;
+      }
+
+    } while (number_of_threads_issued != number_of_threads_completed);
+  }
+
+  VMOp_Type type() const { return VMOp_HandshakeAllThreads; }
+};
+
+class VM_HandshakeFallbackOperation : public VM_Operation {
+  ThreadClosure* _thread_cl;
+  Thread* _target_thread;
+  bool _all_threads;
+  bool _thread_alive;
+public:
+  VM_HandshakeFallbackOperation(ThreadClosure* cl) :
+      _thread_cl(cl), _target_thread(NULL), _all_threads(true), _thread_alive(true) {}
+  VM_HandshakeFallbackOperation(ThreadClosure* cl, Thread* target) :
+      _thread_cl(cl), _target_thread(target), _all_threads(false), _thread_alive(false) {}
+
+  void doit() {
+    ALL_JAVA_THREADS(t) {
+      if (_all_threads || t == _target_thread) {
+        if (t == _target_thread) {
+          _thread_alive = true;
+        }
+        _thread_cl->do_thread(t);
+      }
+    }
+  }
+
+  VMOp_Type type() const { return VMOp_HandshakeFallback; }
+  bool thread_alive() const { return _thread_alive; }
+};
+
+#undef ALL_JAVA_THREADS
+
+void HandshakeThreadsOperation::do_handshake(JavaThread* thread) {
+  ResourceMark rm;
+  FormatBufferResource message("Operation for thread " PTR_FORMAT ", is_vm_thread: %s",
+                               p2i(thread), BOOL_TO_STR(Thread::current()->is_VM_thread()));
+  TraceTime timer(message, TRACETIME_LOG(Debug, handshake, task));
+  _thread_cl->do_thread(thread);
+
+  // Use the semaphore to inform the VM thread that we have completed the operation
+  _done.signal();
+}
+
+void Handshake::execute(ThreadClosure* thread_cl) {
+  if (ThreadLocalHandshakes) {
+    HandshakeThreadsOperation cto(thread_cl);
+    VM_HandshakeAllThreads handshake(&cto);
+    VMThread::execute(&handshake);
+  } else {
+    VM_HandshakeFallbackOperation op(thread_cl);
+    VMThread::execute(&op);
+  }
+}
+
+bool Handshake::execute(ThreadClosure* thread_cl, JavaThread* target) {
+  if (ThreadLocalHandshakes) {
+    HandshakeThreadsOperation cto(thread_cl);
+    VM_HandshakeOneThread handshake(&cto, target);
+    VMThread::execute(&handshake);
+    return handshake.thread_alive();
+  } else {
+    VM_HandshakeFallbackOperation op(thread_cl, target);
+    VMThread::execute(&op);
+    return op.thread_alive();
+  }
+}
+
+HandshakeState::HandshakeState() : _operation(NULL), _semaphore(1), _vmthread_holds_semaphore(false), _thread_in_process_handshake(false) {}
+
+void HandshakeState::set_operation(JavaThread* target, HandshakeOperation* op) {
+  _operation = op;
+  SafepointMechanism::arm_local_poll(target);
+}
+
+void HandshakeState::clear_handshake(JavaThread* target) {
+  _operation = NULL;
+  SafepointMechanism::disarm_local_poll(target);
+}
+
+void HandshakeState::process_self_inner(JavaThread* thread) {
+  assert(Thread::current() == thread, "should call from thread");
+  CautiouslyPreserveExceptionMark pem(thread);
+  ThreadInVMForHandshake tivm(thread);
+  if (!_semaphore.trywait()) {
+    ThreadBlockInVM tbivm(thread);
+    _semaphore.wait();
+  }
+  if (has_operation()) {
+    HandshakeOperation* op = _operation;
+    clear_handshake(thread);
+    if (op != NULL) {
+      op->do_handshake(thread);
+    }
+  }
+  _semaphore.signal();
+}
+
+void HandshakeState::cancel_inner(JavaThread* thread) {
+  assert(Thread::current() == thread, "should call from thread");
+  assert(thread->thread_state() == _thread_in_vm, "must be in vm state");
+#ifdef DEBUG
+  {
+    MutexLockerEx ml(Threads_lock,  Mutex::_no_safepoint_check_flag);
+    assert(!Threads::includes(thread), "java thread must not be on threads list");
+  }
+#endif
+  HandshakeOperation* op = _operation;
+  clear_handshake(thread);
+  if (op != NULL) {
+    op->cancel_handshake(thread);
+  }
+}
+
+bool HandshakeState::vmthread_can_process_handshake(JavaThread* target) {
+  return SafepointSynchronize::safepoint_safe(target, target->thread_state());
+}
+
+bool HandshakeState::claim_handshake_for_vmthread() {
+  if (_semaphore.trywait()) {
+    if (has_operation()) {
+      _vmthread_holds_semaphore = true;
+    } else {
+      _semaphore.signal();
+    }
+  }
+  return _vmthread_holds_semaphore;
+}
+
+void HandshakeState::process_by_vmthread(JavaThread* target) {
+  assert(Thread::current()->is_VM_thread(), "should call from vm thread");
+
+  if (!has_operation()) {
+    // JT has already cleared its handshake
+    return;
+  }
+
+  if (!vmthread_can_process_handshake(target)) {
+    // JT is observed in an unsafe state, it must notice the handshake itself
+    return;
+  }
+
+  // If we own the semaphore at this point and while owning the semaphore
+  // can observe a safe state the thread cannot possibly continue without
+  // getting caught by the semaphore.
+  if (claim_handshake_for_vmthread() && vmthread_can_process_handshake(target)) {
+    guarantee(!_semaphore.trywait(), "we should already own the semaphore");
+
+    _operation->do_handshake(target);
+    clear_handshake(target);
+    _vmthread_holds_semaphore = false;
+    // Release the thread
+    _semaphore.signal();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/runtime/handshake.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_HANDSHAKE_HPP
+#define SHARE_VM_RUNTIME_HANDSHAKE_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/semaphore.hpp"
+
+class ThreadClosure;
+class JavaThread;
+
+// A handshake operation is a callback that is executed for each JavaThread
+// while that thread is in a safepoint safe state. The callback is executed
+// either by the thread itself or by the VM thread while keeping the thread
+// in a blocked state. A handshake can be performed with a single
+// JavaThread as well.
+class Handshake : public AllStatic {
+ public:
+  // Execution of handshake operation
+  static void execute(ThreadClosure* thread_cl);
+  static bool execute(ThreadClosure* thread_cl, JavaThread* target);
+};
+
+class HandshakeOperation;
+
+// The HandshakeState keep tracks of an ongoing handshake for one JavaThread.
+// VM thread and JavaThread are serialized with the semaphore making sure
+// the operation is only done by either VM thread on behalf of the JavaThread
+// or the JavaThread itself.
+class HandshakeState VALUE_OBJ_CLASS_SPEC {
+  HandshakeOperation* volatile _operation;
+
+  Semaphore _semaphore;
+  bool _vmthread_holds_semaphore;
+  bool _thread_in_process_handshake;
+
+  bool claim_handshake_for_vmthread();
+  bool vmthread_can_process_handshake(JavaThread* target);
+
+  void clear_handshake(JavaThread* thread);
+  void cancel_inner(JavaThread* thread);
+
+  void process_self_inner(JavaThread* thread);
+public:
+  HandshakeState();
+
+  void set_operation(JavaThread* thread, HandshakeOperation* op);
+
+  bool has_operation() const {
+    return _operation != NULL;
+  }
+
+  void cancel(JavaThread* thread) {
+    if (!_thread_in_process_handshake) {
+      FlagSetting fs(_thread_in_process_handshake, true);
+      cancel_inner(thread);
+    }
+  }
+
+  void process_by_self(JavaThread* thread) {
+    if (!_thread_in_process_handshake) {
+      FlagSetting fs(_thread_in_process_handshake, true);
+      process_self_inner(thread);
+    }
+  }
+  void process_by_vmthread(JavaThread* target);
+};
+
+#endif // SHARE_VM_RUNTIME_HANDSHAKE_HPP
--- a/src/hotspot/share/runtime/interfaceSupport.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/interfaceSupport.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -30,7 +30,7 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
-#include "runtime/safepoint.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/globalDefinitions.hpp"
@@ -142,9 +142,7 @@
 
     InterfaceSupport::serialize_thread_state(thread);
 
-    if (SafepointSynchronize::do_call_back()) {
-      SafepointSynchronize::block(thread);
-    }
+    SafepointMechanism::block_if_requested(thread);
     thread->set_thread_state(to);
 
     CHECK_UNHANDLED_OOPS_ONLY(thread->clear_unhandled_oops();)
@@ -164,9 +162,7 @@
 
     InterfaceSupport::serialize_thread_state_with_handler(thread);
 
-    if (SafepointSynchronize::do_call_back()) {
-      SafepointSynchronize::block(thread);
-    }
+    SafepointMechanism::block_if_requested(thread);
     thread->set_thread_state(to);
 
     CHECK_UNHANDLED_OOPS_ONLY(thread->clear_unhandled_oops();)
@@ -191,7 +187,7 @@
     // We never install asynchronous exceptions when coming (back) in
     // to the runtime from native code because the runtime is not set
     // up to handle exceptions floating around at arbitrary points.
-    if (SafepointSynchronize::do_call_back() || thread->is_suspend_after_native()) {
+    if (SafepointMechanism::poll(thread) || thread->is_suspend_after_native()) {
       JavaThread::check_safepoint_and_suspend_for_native_trans(thread);
 
       // Clear unhandled oops anywhere where we could block, even if we don't.
@@ -207,6 +203,38 @@
    void trans_and_fence(JavaThreadState from, JavaThreadState to) { transition_and_fence(_thread, from, to); }
 };
 
+class ThreadInVMForHandshake : public ThreadStateTransition {
+  const JavaThreadState _original_state;
+
+  void transition_back() {
+    // This can be invoked from transition states and must return to the original state properly
+    assert(_thread->thread_state() == _thread_in_vm, "should only call when leaving VM after handshake");
+    _thread->set_thread_state(_thread_in_vm_trans);
+
+    InterfaceSupport::serialize_thread_state(_thread);
+
+    SafepointMechanism::block_if_requested(_thread);
+
+    _thread->set_thread_state(_original_state);
+  }
+
+ public:
+
+  ThreadInVMForHandshake(JavaThread* thread) : ThreadStateTransition(thread),
+      _original_state(thread->thread_state()) {
+
+    if (thread->has_last_Java_frame()) {
+      thread->frame_anchor()->make_walkable(thread);
+    }
+
+    thread->set_thread_state(_thread_in_vm);
+  }
+
+  ~ThreadInVMForHandshake() {
+    transition_back();
+  }
+
+};
 
 class ThreadInVMfromJava : public ThreadStateTransition {
  public:
--- a/src/hotspot/share/runtime/mutex.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/mutex.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -28,6 +28,7 @@
 #include "runtime/mutex.hpp"
 #include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/macros.hpp"
@@ -394,7 +395,7 @@
       jint rv = Self->rng[0];
       for (int k = Delay; --k >= 0;) {
         rv = MarsagliaXORV(rv);
-        if ((flgs & 4) == 0 && SafepointSynchronize::do_call_back()) return 0;
+        if ((flgs & 4) == 0 && SafepointMechanism::poll(Self)) return 0;
       }
       Self->rng[0] = rv;
     } else {
--- a/src/hotspot/share/runtime/objectMonitor.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/objectMonitor.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -35,6 +35,7 @@
 #include "runtime/objectMonitor.inline.hpp"
 #include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.inline.hpp"
 #include "services/threadService.hpp"
@@ -1282,7 +1283,7 @@
   OrderAccess::release_store(&_owner, (void*)NULL);
   OrderAccess::fence();                               // ST _owner vs LD in unpark()
 
-  if (SafepointSynchronize::do_call_back()) {
+  if (SafepointMechanism::poll(Self)) {
     TEVENT(unpark before SAFEPOINT);
   }
 
@@ -1936,7 +1937,7 @@
     // This is in keeping with the "no loitering in runtime" rule.
     // We periodically check to see if there's a safepoint pending.
     if ((ctr & 0xFF) == 0) {
-      if (SafepointSynchronize::do_call_back()) {
+      if (SafepointMechanism::poll(Self)) {
         TEVENT(Spin: safepoint);
         goto Abort;           // abrupt spin egress
       }
--- a/src/hotspot/share/runtime/safepoint.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/safepoint.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -52,6 +52,7 @@
 #include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/safepoint.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
 #include "runtime/signature.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/stubRoutines.hpp"
@@ -169,21 +170,32 @@
     int initial_running = 0;
 
     _state            = _synchronizing;
-    OrderAccess::fence();
+
+    if (SafepointMechanism::uses_thread_local_poll()) {
+      // Arming the per thread poll while having _state != _not_synchronized means safepointing
+      log_trace(safepoint)("Setting thread local yield flag for threads");
+      for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) {
+        // Make sure the threads start polling, it is time to yield.
+        SafepointMechanism::arm_local_poll(cur); // release store, global state -> local state
+      }
+    }
+    OrderAccess::fence(); // storestore|storeload, global state -> local state
 
     // Flush all thread states to memory
     if (!UseMembar) {
       os::serialize_thread_states();
     }
 
-    // Make interpreter safepoint aware
-    Interpreter::notice_safepoints();
+    if (SafepointMechanism::uses_global_page_poll()) {
+      // Make interpreter safepoint aware
+      Interpreter::notice_safepoints();
 
-    if (DeferPollingPageLoopCount < 0) {
-      // Make polling safepoint aware
-      guarantee (PageArmed == 0, "invariant") ;
-      PageArmed = 1 ;
-      os::make_polling_page_unreadable();
+      if (DeferPollingPageLoopCount < 0) {
+        // Make polling safepoint aware
+        guarantee (PageArmed == 0, "invariant") ;
+        PageArmed = 1 ;
+        os::make_polling_page_unreadable();
+      }
     }
 
     // Consider using active_processor_count() ... but that call is expensive.
@@ -293,7 +305,7 @@
         // 9. On windows consider using the return value from SwitchThreadTo()
         //    to drive subsequent spin/SwitchThreadTo()/Sleep(N) decisions.
 
-        if (int(iterations) == DeferPollingPageLoopCount) {
+        if (SafepointMechanism::uses_global_page_poll() && int(iterations) == DeferPollingPageLoopCount) {
           guarantee (PageArmed == 0, "invariant") ;
           PageArmed = 1 ;
           os::make_polling_page_unreadable();
@@ -444,7 +456,7 @@
   // A pending_exception cannot be installed during a safepoint.  The threads
   // may install an async exception after they come back from a safepoint into
   // pending_exception after they unblock.  But that should happen later.
-  for(JavaThread *cur = Threads::first(); cur; cur = cur->next()) {
+  for (JavaThread *cur = Threads::first(); cur; cur = cur->next()) {
     assert (!(cur->has_pending_exception() &&
               cur->safepoint_state()->is_at_poll_safepoint()),
             "safepoint installed a pending exception");
@@ -452,46 +464,60 @@
 #endif // ASSERT
 
   if (PageArmed) {
+    assert(SafepointMechanism::uses_global_page_poll(), "sanity");
     // Make polling safepoint aware
     os::make_polling_page_readable();
     PageArmed = 0 ;
   }
 
-  // Remove safepoint check from interpreter
-  Interpreter::ignore_safepoints();
+  if (SafepointMechanism::uses_global_page_poll()) {
+    // Remove safepoint check from interpreter
+    Interpreter::ignore_safepoints();
+  }
 
   {
     MutexLocker mu(Safepoint_lock);
 
     assert(_state == _synchronized, "must be synchronized before ending safepoint synchronization");
 
-    // Set to not synchronized, so the threads will not go into the signal_thread_blocked method
-    // when they get restarted.
-    _state = _not_synchronized;
-    OrderAccess::fence();
+    if (SafepointMechanism::uses_thread_local_poll()) {
+      _state = _not_synchronized;
+      OrderAccess::storestore(); // global state -> local state
+      for (JavaThread *current = Threads::first(); current; current = current->next()) {
+        ThreadSafepointState* cur_state = current->safepoint_state();
+        cur_state->restart(); // TSS _running
+        SafepointMechanism::disarm_local_poll(current); // release store, local state -> polling page
+      }
+      log_debug(safepoint)("Leaving safepoint region");
+    } else {
+      // Set to not synchronized, so the threads will not go into the signal_thread_blocked method
+      // when they get restarted.
+      _state = _not_synchronized;
+      OrderAccess::fence();
 
-    log_debug(safepoint)("Leaving safepoint region");
+      log_debug(safepoint)("Leaving safepoint region");
 
-    // Start suspended threads
-    for(JavaThread *current = Threads::first(); current; current = current->next()) {
-      // A problem occurring on Solaris is when attempting to restart threads
-      // the first #cpus - 1 go well, but then the VMThread is preempted when we get
-      // to the next one (since it has been running the longest).  We then have
-      // to wait for a cpu to become available before we can continue restarting
-      // threads.
-      // FIXME: This causes the performance of the VM to degrade when active and with
-      // large numbers of threads.  Apparently this is due to the synchronous nature
-      // of suspending threads.
-      //
-      // TODO-FIXME: the comments above are vestigial and no longer apply.
-      // Furthermore, using solaris' schedctl in this particular context confers no benefit
-      if (VMThreadHintNoPreempt) {
-        os::hint_no_preempt();
+      // Start suspended threads
+      for (JavaThread *current = Threads::first(); current; current = current->next()) {
+        // A problem occurring on Solaris is when attempting to restart threads
+        // the first #cpus - 1 go well, but then the VMThread is preempted when we get
+        // to the next one (since it has been running the longest).  We then have
+        // to wait for a cpu to become available before we can continue restarting
+        // threads.
+        // FIXME: This causes the performance of the VM to degrade when active and with
+        // large numbers of threads.  Apparently this is due to the synchronous nature
+        // of suspending threads.
+        //
+        // TODO-FIXME: the comments above are vestigial and no longer apply.
+        // Furthermore, using solaris' schedctl in this particular context confers no benefit
+        if (VMThreadHintNoPreempt) {
+          os::hint_no_preempt();
+        }
+        ThreadSafepointState* cur_state = current->safepoint_state();
+        assert(cur_state->type() != ThreadSafepointState::_running, "Thread not suspended at safepoint");
+        cur_state->restart();
+        assert(cur_state->is_running(), "safepoint state has not been reset");
       }
-      ThreadSafepointState* cur_state = current->safepoint_state();
-      assert(cur_state->type() != ThreadSafepointState::_running, "Thread not suspended at safepoint");
-      cur_state->restart();
-      assert(cur_state->is_running(), "safepoint state has not been reset");
     }
 
     RuntimeService::record_safepoint_end();
@@ -855,7 +881,9 @@
 void SafepointSynchronize::handle_polling_page_exception(JavaThread *thread) {
   assert(thread->is_Java_thread(), "polling reference encountered by VM thread");
   assert(thread->thread_state() == _thread_in_Java, "should come from Java code");
-  assert(SafepointSynchronize::is_synchronizing(), "polling encountered outside safepoint synchronization");
+  if (!ThreadLocalHandshakes) {
+    assert(SafepointSynchronize::is_synchronizing(), "polling encountered outside safepoint synchronization");
+  }
 
   if (ShowSafepointMsgs) {
     tty->print("handle_polling_page_exception: ");
@@ -887,7 +915,7 @@
     tty->print_cr("# SafepointSynchronize::begin: Threads which did not reach the safepoint:");
     ThreadSafepointState *cur_state;
     ResourceMark rm;
-    for(JavaThread *cur_thread = Threads::first(); cur_thread;
+    for (JavaThread *cur_thread = Threads::first(); cur_thread;
         cur_thread = cur_thread->next()) {
       cur_state = cur_thread->safepoint_state();
 
@@ -1053,13 +1081,14 @@
 
 // ---------------------------------------------------------------------------------------------------------------------
 
-// Block the thread at the safepoint poll or poll return.
+// Block the thread at poll or poll return for safepoint/handshake.
 void ThreadSafepointState::handle_polling_page_exception() {
 
   // Check state.  block() will set thread state to thread_in_vm which will
   // cause the safepoint state _type to become _call_back.
-  assert(type() == ThreadSafepointState::_running,
-         "polling page exception on thread not running state");
+  suspend_type t = type();
+  assert(!SafepointMechanism::uses_global_page_poll() || t == ThreadSafepointState::_running,
+         "polling page exception on thread not running state: %u", uint(t));
 
   // Step 1: Find the nmethod from the return address
   if (ShowSafepointMsgs && Verbose) {
@@ -1101,7 +1130,7 @@
     }
 
     // Block the thread
-    SafepointSynchronize::block(thread());
+    SafepointMechanism::block_if_requested(thread());
 
     // restore oop result, if any
     if (return_oop) {
@@ -1117,7 +1146,7 @@
     assert(real_return_addr == caller_fr.pc(), "must match");
 
     // Block the thread
-    SafepointSynchronize::block(thread());
+    SafepointMechanism::block_if_requested(thread());
     set_at_poll_safepoint(false);
 
     // If we have a pending async exception deoptimize the frame
@@ -1398,7 +1427,7 @@
     tty->print_cr("State: %s", (_state == _synchronizing) ? "synchronizing" :
                   "synchronized");
 
-    for(JavaThread *cur = Threads::first(); cur; cur = cur->next()) {
+    for (JavaThread *cur = Threads::first(); cur; cur = cur->next()) {
        cur->safepoint_state()->print();
     }
   }
--- a/src/hotspot/share/runtime/safepoint.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/safepoint.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -160,17 +160,22 @@
   inline static bool is_synchronizing()  { return _state == _synchronizing;  }
   inline static int safepoint_counter()  { return _safepoint_counter; }
 
-  inline static bool do_call_back() {
-    return (_state != _not_synchronized);
-  }
-
   inline static void increment_jni_active_count() {
     assert_locked_or_safepoint(Safepoint_lock);
     _current_jni_active_count++;
   }
 
+private:
+  inline static bool do_call_back() {
+    return (_state != _not_synchronized);
+  }
+
   // Called when a thread voluntarily blocks
   static void   block(JavaThread *thread);
+
+  friend class SafepointMechanism;
+
+public:
   static void   signal_thread_at_safepoint()              { _waiting_to_block--; }
 
   // Exception handling for page polling
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/runtime/safepointMechanism.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "logging/log.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/os.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+SafepointMechanism::PollingType SafepointMechanism::_polling_type = SafepointMechanism::_global_page_poll;
+void* SafepointMechanism::_poll_armed_value;
+void* SafepointMechanism::_poll_disarmed_value;
+
+void SafepointMechanism::default_initialize() {
+  if (ThreadLocalHandshakes) {
+    set_uses_thread_local_poll();
+    const size_t page_size = os::vm_page_size();
+    const size_t allocation_size = 2 * page_size;
+    char* polling_page = os::reserve_memory(allocation_size, NULL, page_size);
+    os::commit_memory_or_exit(polling_page, allocation_size, false, "Unable to commit Safepoint polling page");
+
+    char* bad_page  = polling_page;
+    char* good_page = polling_page + page_size;
+
+    os::protect_memory(bad_page, page_size, os::MEM_PROT_NONE);
+    os::protect_memory(good_page, page_size, os::MEM_PROT_READ);
+
+    log_info(os)("SafePoint Polling address, bad (protected) page:" INTPTR_FORMAT ", good (unprotected) page:" INTPTR_FORMAT, p2i(bad_page), p2i(good_page));
+    os::set_polling_page((address)(bad_page));
+
+    intptr_t poll_page_val = reinterpret_cast<intptr_t>(bad_page);
+    _poll_armed_value = reinterpret_cast<void*>(poll_page_val | poll_bit());
+    _poll_disarmed_value = good_page;
+  } else {
+    const size_t page_size = os::vm_page_size();
+    char* polling_page = os::reserve_memory(page_size, NULL, page_size);
+    os::commit_memory_or_exit(polling_page, page_size, false, "Unable to commit Safepoint polling page");
+    os::protect_memory(polling_page, page_size, os::MEM_PROT_READ);
+
+    log_info(os)("SafePoint Polling address: " INTPTR_FORMAT, p2i(polling_page));
+    os::set_polling_page((address)(polling_page));
+  }
+}
+
+void SafepointMechanism::initialize_header(JavaThread* thread) {
+  disarm_local_poll(thread);
+}
+
+void SafepointMechanism::initialize_serialize_page() {
+  if (!UseMembar) {
+    const size_t page_size = os::vm_page_size();
+    char* serialize_page = os::reserve_memory(page_size, NULL, page_size);
+    os::commit_memory_or_exit(serialize_page, page_size, false, "Unable to commit memory serialization page");
+    log_info(os)("Memory Serialize Page address: " INTPTR_FORMAT, p2i(serialize_page));
+    os::set_memory_serialize_page((address)(serialize_page));
+  }
+}
+
+void SafepointMechanism::initialize() {
+  pd_initialize();
+  initialize_serialize_page();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/runtime/safepointMechanism.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_SAFEPOINTMECHANISM_HPP
+#define SHARE_VM_RUNTIME_SAFEPOINTMECHANISM_HPP
+
+#include "runtime/globals.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+#include "utilities/sizes.hpp"
+
+// This is the abstracted interface for the safepoint implementation
+class SafepointMechanism : public AllStatic {
+  enum PollingType {
+    _global_page_poll,
+    _thread_local_poll
+  };
+  static PollingType _polling_type;
+  static void* _poll_armed_value;
+  static void* _poll_disarmed_value;
+  static void set_uses_thread_local_poll()            { _polling_type     = _thread_local_poll; }
+
+  static void* poll_armed_value()                     { return _poll_armed_value; }
+  static void* poll_disarmed_value()                  { return _poll_disarmed_value; }
+
+  static inline bool local_poll_armed(JavaThread* thread);
+
+  static inline bool local_poll(Thread* thread);
+  static inline bool global_poll();
+
+  static inline void block_if_requested_local_poll(JavaThread *thread);
+
+  static void default_initialize();
+  static void initialize_serialize_page();
+
+  static void pd_initialize() NOT_AIX({ default_initialize(); });
+
+  // By adding 8 to the base address of the protected polling page we can differentiate
+  // between the armed and disarmed value by masking out this bit.
+  const static intptr_t _poll_bit = 8;
+public:
+  static intptr_t poll_bit() { return _poll_bit; }
+
+  static bool uses_global_page_poll() { return _polling_type == _global_page_poll; }
+  static bool uses_thread_local_poll() { return _polling_type == _thread_local_poll; }
+
+  static bool supports_thread_local_poll() {
+#ifdef THREAD_LOCAL_POLL
+    return true;
+#else
+    return false;
+#endif
+  }
+
+  // Call this method to see if this thread has depending poll and appropriate action should be taken
+  static inline bool poll(Thread* thread);
+
+  // Blocks a thread until safepoint is completed
+  static inline void block_if_requested(JavaThread* thread);
+
+  static inline void arm_local_poll(JavaThread* thread);
+  static inline void disarm_local_poll(JavaThread* thread);
+
+  // Setup the selected safepoint mechanism
+  static void initialize();
+  static void initialize_header(JavaThread* thread);
+};
+
+#endif // SHARE_VM_RUNTIME_SAFEPOINTMECHANISM_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/runtime/safepointMechanism.inline.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_SAFEPOINTMECHANISM_INLINE_HPP
+#define SHARE_VM_RUNTIME_SAFEPOINTMECHANISM_INLINE_HPP
+
+#include "runtime/safepointMechanism.hpp"
+#include "runtime/safepoint.hpp"
+#include "runtime/thread.inline.hpp"
+
+bool SafepointMechanism::local_poll_armed(JavaThread* thread) {
+  const intptr_t poll_word = reinterpret_cast<intptr_t>(thread->get_polling_page());
+  return mask_bits_are_true(poll_word, poll_bit());
+}
+
+bool SafepointMechanism::global_poll() {
+  return SafepointSynchronize::do_call_back();
+}
+
+bool SafepointMechanism::local_poll(Thread* thread) {
+  if (thread->is_Java_thread()) {
+    return local_poll_armed((JavaThread*)thread);
+  } else {
+    // If the poll is on a non-java thread we can only check the global state.
+    return global_poll();
+  }
+}
+
+bool SafepointMechanism::poll(Thread* thread) {
+  if (uses_thread_local_poll()) {
+    return local_poll(thread);
+  } else {
+    return global_poll();
+  }
+}
+
+void SafepointMechanism::block_if_requested_local_poll(JavaThread *thread) {
+  bool armed = local_poll_armed(thread); // load acquire, polling page -> op / global state
+  if(armed) {
+    // We could be armed for either a handshake operation or a safepoint
+    if (thread->has_handshake()) {
+      thread->handshake_process_by_self();
+    } else {
+      if (global_poll()) {
+        SafepointSynchronize::block(thread);
+      }
+    }
+  }
+}
+
+void SafepointMechanism::block_if_requested(JavaThread *thread) {
+  if (uses_thread_local_poll()) {
+    block_if_requested_local_poll(thread);
+  } else {
+    // If we don't have per thread poll this could a handshake or a safepoint
+    if (global_poll()) {
+      SafepointSynchronize::block(thread);
+    }
+  }
+}
+
+void SafepointMechanism::arm_local_poll(JavaThread* thread) {
+  thread->set_polling_page(poll_armed_value());
+}
+
+void SafepointMechanism::disarm_local_poll(JavaThread* thread) {
+  thread->set_polling_page(poll_disarmed_value());
+}
+
+#endif // SHARE_VM_RUNTIME_SAFEPOINTMECHANISM_INLINE_HPP
--- a/src/hotspot/share/runtime/thread.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/thread.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -65,6 +65,7 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/globals.hpp"
+#include "runtime/handshake.hpp"
 #include "runtime/init.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/java.hpp"
@@ -77,6 +78,7 @@
 #include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/safepoint.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/statSampler.hpp"
 #include "runtime/stubRoutines.hpp"
@@ -1494,6 +1496,10 @@
   _popframe_preserved_args_size = 0;
   _frames_to_pop_failed_realloc = 0;
 
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    SafepointMechanism::initialize_header(this);
+  }
+
   pd_initialize();
 }
 
@@ -1910,6 +1916,11 @@
 
   // Remove from list of active threads list, and notify VM thread if we are the last non-daemon thread
   Threads::remove(this);
+
+  // If someone set a handshake on us just as we entered exit path, we simple cancel it.
+  if (ThreadLocalHandshakes) {
+    cancel_handshake();
+  }
 }
 
 #if INCLUDE_ALL_GCS
@@ -2372,11 +2383,7 @@
     InterfaceSupport::serialize_thread_state_with_handler(thread);
   }
 
-  if (SafepointSynchronize::do_call_back()) {
-    // If we are safepointing, then block the caller which may not be
-    // the same as the target thread (see above).
-    SafepointSynchronize::block(curJT);
-  }
+  SafepointMechanism::block_if_requested(curJT);
 
   if (thread->is_deopt_suspend()) {
     thread->clear_deopt_suspend();
@@ -3551,6 +3558,8 @@
   // Timing (must come after argument parsing)
   TraceTime timer("Create VM", TRACETIME_LOG(Info, startuptime));
 
+  SafepointMechanism::initialize();
+
   // Initialize the os module after parsing the args
   jint os_init_2_result = os::init_2();
   if (os_init_2_result != JNI_OK) return os_init_2_result;
--- a/src/hotspot/share/runtime/thread.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/thread.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -31,6 +31,7 @@
 #include "oops/oop.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "runtime/frame.hpp"
+#include "runtime/handshake.hpp"
 #include "runtime/javaFrameAnchor.hpp"
 #include "runtime/jniHandles.hpp"
 #include "runtime/mutexLocker.hpp"
@@ -271,6 +272,8 @@
   friend class PauseNoSafepointVerifier;
   friend class GCLocker;
 
+  volatile void* _polling_page;                 // Thread local polling page
+
   ThreadLocalAllocBuffer _tlab;                 // Thread-local eden
   jlong _allocated_bytes;                       // Cumulative number of bytes allocated on
                                                 // the Java heap
@@ -549,6 +552,8 @@
   uintptr_t        _self_raw_id;      // used by get_thread (mutable)
   int              _lgrp_id;
 
+  volatile void** polling_page_addr() { return &_polling_page; }
+
  public:
   // Stack overflow support
   address stack_base() const           { assert(_stack_base != NULL,"Sanity check"); return _stack_base; }
@@ -617,6 +622,8 @@
   static ByteSize stack_base_offset()            { return byte_offset_of(Thread, _stack_base); }
   static ByteSize stack_size_offset()            { return byte_offset_of(Thread, _stack_size); }
 
+  static ByteSize polling_page_offset()          { return byte_offset_of(Thread, _polling_page); }
+
 #define TLAB_FIELD_OFFSET(name) \
   static ByteSize tlab_##name##_offset()         { return byte_offset_of(Thread, _tlab) + ThreadLocalAllocBuffer::name##_offset(); }
 
@@ -1135,6 +1142,33 @@
   bool do_not_unlock_if_synchronized()             { return _do_not_unlock_if_synchronized; }
   void set_do_not_unlock_if_synchronized(bool val) { _do_not_unlock_if_synchronized = val; }
 
+  inline void set_polling_page(void* poll_value);
+  inline volatile void* get_polling_page();
+
+ private:
+  // Support for thread handshake operations
+  HandshakeState _handshake;
+ public:
+  void set_handshake_operation(HandshakeOperation* op) {
+    _handshake.set_operation(this, op);
+  }
+
+  bool has_handshake() const {
+    return _handshake.has_operation();
+  }
+
+  void cancel_handshake() {
+    _handshake.cancel(this);
+  }
+
+  void handshake_process_by_self() {
+    _handshake.process_by_self(this);
+  }
+
+  void handshake_process_by_vmthread() {
+    _handshake.process_by_vmthread(this);
+  }
+
   // Suspend/resume support for JavaThread
  private:
   inline void set_ext_suspended();
--- a/src/hotspot/share/runtime/thread.inline.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/thread.inline.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -163,4 +163,16 @@
   return _stack_guard_state == stack_guard_enabled;
 }
 
+// The release make sure this store is done after storing the handshake
+// operation or global state
+inline void JavaThread::set_polling_page(void* poll_value) {
+  OrderAccess::release_store(polling_page_addr(), poll_value);
+}
+
+// The aqcquire make sure reading of polling page is done before
+// the reading the handshake operation or the global state
+inline volatile void* JavaThread::get_polling_page() {
+  return OrderAccess::load_acquire(polling_page_addr());
+}
+
 #endif // SHARE_VM_RUNTIME_THREAD_INLINE_HPP
--- a/src/hotspot/share/runtime/timer.cpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/timer.cpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,6 +37,11 @@
   return counter_to_seconds(counter) * 1000.0;
 }
 
+jlong TimeHelper::millis_to_counter(jlong millis) {
+  jlong freq = os::elapsed_frequency() / MILLIUNITS;
+  return millis * freq;
+}
+
 elapsedTimer::elapsedTimer(jlong time, jlong timeUnitsPerSecond) {
   _active = false;
   jlong osTimeUnitsPerSecond = os::elapsed_frequency();
--- a/src/hotspot/share/runtime/timer.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/timer.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -93,6 +93,7 @@
  public:
   static double counter_to_seconds(jlong counter);
   static double counter_to_millis(jlong counter);
+  static jlong millis_to_counter(jlong millis);
 };
 
 #endif // SHARE_VM_RUNTIME_TIMER_HPP
--- a/src/hotspot/share/runtime/vm_operations.hpp	Sat Nov 11 01:21:09 2017 +0100
+++ b/src/hotspot/share/runtime/vm_operations.hpp	Thu Aug 31 10:00:28 2017 +0200
@@ -69,6 +69,9 @@
   template(G1CollectFull)                         \
   template(G1CollectForAllocation)                \
   template(G1IncCollectionPause)                  \
+  template(HandshakeOneThread)                    \
+  template(HandshakeAllThreads)                   \
+  template(HandshakeFallback)                     \
   template(DestroyAllocationContext)              \
   template(EnableBiasedLocking)                   \
   template(RevokeBias)                            \
--- a/test/hotspot/jtreg/TEST.groups	Sat Nov 11 01:21:09 2017 +0100
+++ b/test/hotspot/jtreg/TEST.groups	Thu Aug 31 10:00:28 2017 +0200
@@ -33,6 +33,9 @@
 hotspot_runtime = \
   runtime
 
+hotspot_handshake = \
+  runtime/handshake
+
 hotspot_serviceability = \
   serviceability
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/handshake/HandshakeTransitionTest.java	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+import java.io.File;
+import java.nio.file.Paths;
+import java.time.Duration;
+
+import jdk.test.lib.process.ProcessTools;
+import jdk.test.lib.process.OutputAnalyzer;
+
+/*
+ * @test HandshakeTransitionTest
+ * @summary This does a sanity test of the poll in the native wrapper.
+ * @requires vm.debug
+ * @library /testlibrary /test/lib
+ * @build HandshakeTransitionTest
+ * @run main/native HandshakeTransitionTest
+ */
+
+public class HandshakeTransitionTest {
+
+    public static native void someTime(int ms);
+
+    public static void main(String[] args) throws Exception {
+        String lib = System.getProperty("test.nativepath");
+        ProcessBuilder pb =
+            ProcessTools.createJavaProcessBuilder(
+                    true,
+                    "-Djava.library.path=" + lib,
+                    "-XX:+SafepointALot",
+                    "-XX:GuaranteedSafepointInterval=20",
+                    "-Xlog:ergo*",
+                    "-XX:ParallelGCThreads=1",
+                    "-XX:ConcGCThreads=1",
+                    "-XX:CICompilerCount=2",
+                    "HandshakeTransitionTest$Test");
+
+
+        OutputAnalyzer output = ProcessTools.executeProcess(pb);
+        output.reportDiagnosticSummary();
+        output.shouldHaveExitValue(0);
+        output.stdoutShouldContain("JOINED");
+    }
+
+    static class Test implements Runnable {
+        final static int testLoops = 2000;
+        final static int testSleep = 1; //ms
+
+        public static void main(String[] args) throws Exception {
+            System.loadLibrary("HandshakeTransitionTest");
+            Test test = new Test();
+            Thread[] threads = new Thread[64];
+            for (int i = 0; i<threads.length ; i++) {
+                threads[i] = new Thread(test);
+                threads[i].start();
+            }
+            for (Thread t : threads) {
+                t.join();
+            }
+            System.out.println("JOINED");
+        }
+
+        @Override
+        public void run() {
+            try {
+                for (int i = 0; i<testLoops ; i++) {
+                    someTime(testSleep);
+                }
+            } catch (Exception e) {
+                System.out.println(e.getMessage());
+                System.exit(1);
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/handshake/HandshakeWalkExitTest.java	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test HandshakeWalkExitTest
+ * @summary This test tries to stress the handshakes with new and exiting threads
+ * @library /testlibrary /test/lib
+ * @build HandshakeWalkExitTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI HandshakeWalkExitTest
+ */
+
+import jdk.test.lib.Asserts;
+import sun.hotspot.WhiteBox;
+
+public class HandshakeWalkExitTest  implements Runnable {
+
+    @Override
+    public void run() {
+    }
+
+    static volatile boolean exit_now = false;
+    static Thread[] threads;
+
+    public static void main(String... args) throws Exception {
+        int testRuns = 100;
+        int testThreads = 500;
+
+        HandshakeWalkExitTest test = new HandshakeWalkExitTest();
+
+        threads = new Thread[64];
+
+        Runnable hser = new Runnable(){
+            public void run(){
+                WhiteBox wb = WhiteBox.getWhiteBox();
+                while(!exit_now) {
+                    wb.handshakeWalkStack(null, true);
+                    try { Thread.sleep(1); } catch(Exception e) {}
+                }
+            }
+        };
+        Thread hst = new Thread(hser);
+        hst.start();
+        for (int k = 0; k<testRuns ; k++) {
+            Thread[] threads = new Thread[testThreads];
+            for (int i = 0; i<threads.length ; i++) {
+                threads[i] = new Thread(test);
+                threads[i].start();
+            }
+        }
+        exit_now = true;
+        hst.join();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/handshake/HandshakeWalkStackFallbackTest.java	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test HandshakeWalkStackFallbackTest
+ * @summary This test the global safepoint fallback path for handshakes
+ * @library /testlibrary /test/lib
+ * @build HandshakeWalkStackTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:-ThreadLocalHandshakes HandshakeWalkStackTest
+ */
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/handshake/HandshakeWalkStackTest.java	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test HandshakeWalkStackTest
+ * @library /testlibrary /test/lib
+ * @build HandshakeWalkStackTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI HandshakeWalkStackTest
+ */
+
+import jdk.test.lib.Asserts;
+import sun.hotspot.WhiteBox;
+
+public class HandshakeWalkStackTest {
+
+    public static void main(String... args) throws Exception {
+        int iterations = 3;
+        if (args.length > 0) {
+            iterations = Integer.parseInt(args[0]);
+        }
+        test(iterations);
+    }
+
+    private static void test(int iterations) throws Exception {
+        Thread loop_thread  = new Thread(() -> run_loop(create_list()));
+        Thread alloc_thread = new Thread(() -> run_alloc());
+        Thread wait_thread  = new Thread(() -> run_wait(new Object() {}));
+        loop_thread.start();
+        alloc_thread.start();
+        wait_thread.start();
+
+        WhiteBox wb = WhiteBox.getWhiteBox();
+        int walked = 0;
+        for (int i = 0; i < iterations; i++) {
+            System.out.println("Iteration " + i);
+            System.out.flush();
+            Thread.sleep(200);
+            walked = wb.handshakeWalkStack(loop_thread, false);
+            Asserts.assertEQ(walked, 1, "Must have walked one thread stack");
+            Thread.sleep(200);
+            walked = wb.handshakeWalkStack(alloc_thread, false);
+            Asserts.assertEQ(walked, 1, "Must have walked one thread stack");
+            Thread.sleep(200);
+            walked = wb.handshakeWalkStack(wait_thread, false);
+            Asserts.assertEQ(walked, 1, "Must have walked one thread stack");
+            Thread.sleep(200);
+            walked = wb.handshakeWalkStack(Thread.currentThread(), false);
+            Asserts.assertEQ(walked, 1, "Must have walked one thread stack");
+        }
+        Thread.sleep(200);
+        walked = wb.handshakeWalkStack(null, true);
+        Asserts.assertGT(walked, 4, "Must have walked more than three thread stacks");
+    }
+
+    static class List {
+        List next;
+
+        List(List next) {
+            this.next = next;
+        }
+    }
+
+    public static List create_list() {
+        List head = new List(null);
+        List elem = new List(head);
+        List elem2 = new List(elem);
+        List elem3 = new List(elem2);
+        List elem4 = new List(elem3);
+        head.next = elem4;
+
+        return head;
+    }
+
+    public static void run_loop(List loop) {
+        while (loop.next != null) {
+            loop = loop.next;
+        }
+    }
+
+    public static byte[] array;
+
+    public static void run_alloc() {
+        while (true) {
+            // Write to public static to ensure the byte array escapes.
+            array = new byte[4096];
+        }
+    }
+
+    public static void run_wait(Object lock) {
+        synchronized (lock) {
+            try {
+                lock.wait();
+            } catch (InterruptedException ie) {}
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/handshake/libHandshakeTransitionTest.c	Thu Aug 31 10:00:28 2017 +0200
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <jni.h>
+
+#ifdef WINDOWS
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+
+JNIEXPORT void JNICALL Java_HandshakeTransitionTest_someTime
+  (JNIEnv *env, jclass jc, jint ms)
+{
+#ifdef WINDOWS
+  Sleep(ms);
+#else
+  usleep(ms*1000);
+#endif
+}
--- a/test/lib/sun/hotspot/WhiteBox.java	Sat Nov 11 01:21:09 2017 +0100
+++ b/test/lib/sun/hotspot/WhiteBox.java	Thu Aug 31 10:00:28 2017 +0200
@@ -531,6 +531,9 @@
   public native int addCompilerDirective(String compDirect);
   public native void removeCompilerDirective(int count);
 
+  // Handshakes
+  public native int handshakeWalkStack(Thread t, boolean all_threads);
+
   // Returns true on linux if library has the noexecstack flag set.
   public native boolean checkLibSpecifiesNoexecstack(String libfilename);
 }