--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp Fri Nov 24 17:19:47 2017 +0000
@@ -494,42 +494,6 @@
}
}
-// Rather than take a segfault when the polling page is protected,
-// explicitly check for a safepoint in progress and if there is one,
-// fake a call to the handler as if a segfault had been caught.
-void LIR_Assembler::poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info) {
- __ mov(rscratch1, SafepointSynchronize::address_of_state());
- __ ldrb(rscratch1, Address(rscratch1));
- Label nope, poll;
- __ cbz(rscratch1, nope);
- __ block_comment("safepoint");
- __ enter();
- __ push(0x3, sp); // r0 & r1
- __ push(0x3ffffffc, sp); // integer registers except lr & sp & r0 & r1
- __ adr(r0, poll);
- __ str(r0, Address(rthread, JavaThread::saved_exception_pc_offset()));
- __ mov(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::get_poll_stub));
- __ blrt(rscratch1, 1, 0, 1);
- __ maybe_isb();
- __ pop(0x3ffffffc, sp); // integer registers except lr & sp & r0 & r1
- __ mov(rscratch1, r0);
- __ pop(0x3, sp); // r0 & r1
- __ leave();
- __ br(rscratch1);
- address polling_page(os::get_polling_page());
- assert(os::is_poll_address(polling_page), "should be");
- unsigned long off;
- __ adrp(rscratch1, Address(polling_page, rtype), off);
- __ bind(poll);
- if (info)
- add_debug_info_for_branch(info); // This isn't just debug info:
- // it's the oop map
- else
- __ code_section()->relocate(pc(), rtype);
- __ ldrw(zr, Address(rscratch1, off));
- __ bind(nope);
-}
-
void LIR_Assembler::return_op(LIR_Opr result) {
assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,");
@@ -549,11 +513,9 @@
address polling_page(os::get_polling_page());
guarantee(info != NULL, "Shouldn't be NULL");
assert(os::is_poll_address(polling_page), "should be");
- unsigned long off;
- __ adrp(rscratch1, Address(polling_page, relocInfo::poll_type), off);
- assert(off == 0, "must be");
+ __ get_polling_page(rscratch1, polling_page, relocInfo::poll_type);
add_debug_info_for_branch(info); // This isn't just debug info:
- // it's the oop map
+ // it's the oop map
__ read_polling_page(rscratch1, relocInfo::poll_type);
return __ offset();
}
--- a/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp Fri Nov 24 17:19:47 2017 +0000
@@ -51,4 +51,6 @@
#define SUPPORT_RESERVED_STACK_AREA
+#define THREAD_LOCAL_POLL
+
#endif // CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp Fri Nov 24 17:19:47 2017 +0000
@@ -79,7 +79,7 @@
// Clear short arrays bigger than one word in an arch-specific way
define_pd_global(intx, InitArrayShortSize, BytesPerLong);
-define_pd_global(bool, ThreadLocalHandshakes, false);
+define_pd_global(bool, ThreadLocalHandshakes, true);
#if defined(COMPILER1) || defined(COMPILER2)
define_pd_global(intx, InlineSmallCode, 1000);
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp Fri Nov 24 17:19:47 2017 +0000
@@ -30,12 +30,13 @@
#include "logging/log.hpp"
#include "oops/arrayOop.hpp"
#include "oops/markOop.hpp"
+#include "oops/method.hpp"
#include "oops/methodData.hpp"
-#include "oops/method.hpp"
#include "prims/jvmtiExport.hpp"
#include "prims/jvmtiThreadState.hpp"
#include "runtime/basicLock.hpp"
#include "runtime/biasedLocking.hpp"
+#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/thread.inline.hpp"
@@ -438,13 +439,26 @@
void InterpreterMacroAssembler::dispatch_base(TosState state,
address* table,
- bool verifyoop) {
+ bool verifyoop,
+ bool generate_poll) {
if (VerifyActivationFrameSize) {
Unimplemented();
}
if (verifyoop) {
verify_oop(r0, state);
}
+
+ Label safepoint;
+ address* const safepoint_table = Interpreter::safept_table(state);
+ bool needs_thread_local_poll = generate_poll &&
+ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
+
+ if (needs_thread_local_poll) {
+ NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
+ ldr(rscratch2, Address(rthread, Thread::polling_page_offset()));
+ tbnz(rscratch2, exact_log2(SafepointMechanism::poll_bit()), safepoint);
+ }
+
if (table == Interpreter::dispatch_table(state)) {
addw(rscratch2, rscratch1, Interpreter::distance_from_dispatch_table(state));
ldr(rscratch2, Address(rdispatch, rscratch2, Address::uxtw(3)));
@@ -453,10 +467,17 @@
ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3)));
}
br(rscratch2);
+
+ if (needs_thread_local_poll) {
+ bind(safepoint);
+ lea(rscratch2, ExternalAddress((address)safepoint_table));
+ ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3)));
+ br(rscratch2);
+ }
}
-void InterpreterMacroAssembler::dispatch_only(TosState state) {
- dispatch_base(state, Interpreter::dispatch_table(state));
+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
}
void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
@@ -468,10 +489,10 @@
}
-void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
// load next bytecode
ldrb(rscratch1, Address(pre(rbcp, step)));
- dispatch_base(state, Interpreter::dispatch_table(state));
+ dispatch_base(state, Interpreter::dispatch_table(state), generate_poll);
}
void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp Fri Nov 24 17:19:47 2017 +0000
@@ -55,7 +55,8 @@
bool check_exceptions);
// base routine for all dispatches
- void dispatch_base(TosState state, address* table, bool verifyoop = true);
+ void dispatch_base(TosState state, address* table,
+ bool verifyoop = true, bool generate_poll = false);
public:
InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
@@ -165,12 +166,12 @@
void dispatch_prolog(TosState state, int step = 0);
void dispatch_epilog(TosState state, int step = 0);
// dispatch via rscratch1
- void dispatch_only(TosState state);
+ void dispatch_only(TosState state, bool generate_poll = false);
// dispatch normal table via rscratch1 (assume rscratch1 is loaded already)
void dispatch_only_normal(TosState state);
void dispatch_only_noverify(TosState state);
// load rscratch1 from [rbcp + step] and dispatch via rscratch1
- void dispatch_next(TosState state, int step = 0);
+ void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
// load rscratch1 from [esi] and dispatch via rscratch1 and table
void dispatch_via (TosState state, address* table);
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Fri Nov 24 17:19:47 2017 +0000
@@ -287,6 +287,40 @@
dsb(Assembler::SY);
}
+void MacroAssembler::safepoint_poll(Label& slow_path) {
+ if (SafepointMechanism::uses_thread_local_poll()) {
+ ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
+ tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
+ } else {
+ unsigned long offset;
+ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
+ ldrw(rscratch1, Address(rscratch1, offset));
+ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
+ cbnz(rscratch1, slow_path);
+ }
+}
+
+// Just like safepoint_poll, but use an acquiring load for thread-
+// local polling.
+//
+// We need an acquire here to ensure that any subsequent load of the
+// global SafepointSynchronize::_state flag is ordered after this load
+// of the local Thread::_polling page. We don't want this poll to
+// return false (i.e. not safepointing) and a later poll of the global
+// SafepointSynchronize::_state spuriously to return true.
+//
+// This is to avoid a race when we're in a native->Java transition
+// racing the code which wakes up from a safepoint.
+//
+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
+ if (SafepointMechanism::uses_thread_local_poll()) {
+ lea(rscratch1, Address(rthread, Thread::polling_page_offset()));
+ ldar(rscratch1, rscratch1);
+ tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
+ } else {
+ safepoint_poll(slow_path);
+ }
+}
void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
// we must set sp to zero to clear frame
@@ -4336,15 +4370,26 @@
}
+// Move the address of the polling page into dest.
+void MacroAssembler::get_polling_page(Register dest, address page, relocInfo::relocType rtype) {
+ if (SafepointMechanism::uses_thread_local_poll()) {
+ ldr(dest, Address(rthread, Thread::polling_page_offset()));
+ } else {
+ unsigned long off;
+ adrp(dest, Address(page, rtype), off);
+ assert(off == 0, "polling page must be page aligned");
+ }
+}
+
+// Move the address of the polling page into r, then read the polling
+// page.
address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) {
- unsigned long off;
- adrp(r, Address(page, rtype), off);
- InstructionMark im(this);
- code_section()->relocate(inst_mark(), rtype);
- ldrw(zr, Address(r, off));
- return inst_mark();
-}
-
+ get_polling_page(r, page, rtype);
+ return read_polling_page(r, rtype);
+}
+
+// Read the polling page. The address of the polling page must
+// already be in r.
address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
InstructionMark im(this);
code_section()->relocate(inst_mark(), rtype);
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp Fri Nov 24 17:19:47 2017 +0000
@@ -97,6 +97,9 @@
virtual void check_and_handle_popframe(Register java_thread);
virtual void check_and_handle_earlyret(Register java_thread);
+ void safepoint_poll(Label& slow_path);
+ void safepoint_poll_acquire(Label& slow_path);
+
// Biased locking support
// lock_reg and obj_reg must be loaded up with the appropriate values.
// swap_reg is killed.
@@ -1199,6 +1202,7 @@
address read_polling_page(Register r, address page, relocInfo::relocType rtype);
address read_polling_page(Register r, relocInfo::relocType rtype);
+ void get_polling_page(Register dest, address page, relocInfo::relocType rtype);
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
void update_byte_crc32(Register crc, Register val, Register table);
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp Fri Nov 24 17:19:47 2017 +0000
@@ -245,6 +245,11 @@
// mov(reg, polling_page);
// ldr(zr, [reg, #offset]);
//
+ // or
+ //
+ // ldr(reg, [rthread, #offset]);
+ // ldr(zr, [reg, #offset]);
+ //
// however, we cannot rely on the polling page address load always
// directly preceding the read from the page. C1 does that but C2
// has to do the load and read as two independent instruction
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp Fri Nov 24 17:19:47 2017 +0000
@@ -1952,7 +1952,7 @@
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
// Force this write out before the read below
- __ dmb(Assembler::SY);
+ __ dmb(Assembler::ISH);
} else {
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);
@@ -1970,13 +1970,7 @@
// check for safepoint operation in progress and/or pending suspend requests
Label safepoint_in_progress, safepoint_in_progress_done;
{
- assert(SafepointSynchronize::_not_synchronized == 0, "fix this code");
- unsigned long offset;
- __ adrp(rscratch1,
- ExternalAddress((address)SafepointSynchronize::address_of_state()),
- offset);
- __ ldrw(rscratch1, Address(rscratch1, offset));
- __ cbnzw(rscratch1, safepoint_in_progress);
+ __ safepoint_poll_acquire(safepoint_in_progress);
__ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbnzw(rscratch1, safepoint_in_progress);
__ bind(safepoint_in_progress_done);
@@ -2932,8 +2926,11 @@
if (!cause_return) {
// overwrite the return address pushed by save_live_registers
- __ ldr(c_rarg0, Address(rthread, JavaThread::saved_exception_pc_offset()));
- __ str(c_rarg0, Address(rfp, wordSize));
+ // Additionally, r20 is a callee-saved register so we can look at
+ // it later to determine if someone changed the return address for
+ // us!
+ __ ldr(r20, Address(rthread, JavaThread::saved_exception_pc_offset()));
+ __ str(r20, Address(rfp, wordSize));
}
// Do the call
@@ -2968,11 +2965,40 @@
// No exception case
__ bind(noException);
+ Label no_adjust, bail;
+ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
+ // If our stashed return pc was modified by the runtime we avoid touching it
+ __ ldr(rscratch1, Address(rfp, wordSize));
+ __ cmp(r20, rscratch1);
+ __ br(Assembler::NE, no_adjust);
+
+#ifdef ASSERT
+ // Verify the correct encoding of the poll we're about to skip.
+ // See NativeInstruction::is_ldrw_to_zr()
+ __ ldrw(rscratch1, Address(r20));
+ __ ubfx(rscratch2, rscratch1, 22, 10);
+ __ cmpw(rscratch2, 0b1011100101);
+ __ br(Assembler::NE, bail);
+ __ ubfx(rscratch2, rscratch1, 0, 5);
+ __ cmpw(rscratch2, 0b11111);
+ __ br(Assembler::NE, bail);
+#endif
+ // Adjust return pc forward to step over the safepoint poll instruction
+ __ add(r20, r20, NativeInstruction::instruction_size);
+ __ str(r20, Address(rfp, wordSize));
+ }
+
+ __ bind(no_adjust);
// Normal exit, restore registers and exit.
RegisterSaver::restore_live_registers(masm, save_vectors);
__ ret(lr);
+#ifdef ASSERT
+ __ bind(bail);
+ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
+#endif
+
// Make sure all code is generated
masm->flush();
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Fri Nov 24 17:19:47 2017 +0000
@@ -967,12 +967,7 @@
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
- ExternalAddress state(SafepointSynchronize::address_of_state());
- unsigned long offset;
- __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
- __ ldrw(rscratch1, Address(rscratch1, offset));
- assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
- __ cbnz(rscratch1, slow_path);
+ __ safepoint_poll(slow_path);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@@ -986,6 +981,7 @@
__ ldrw(val, Address(esp, 0)); // byte value
__ ldrw(crc, Address(esp, wordSize)); // Initial CRC
+ unsigned long offset;
__ adrp(tbl, ExternalAddress(StubRoutines::crc_table_addr()), offset);
__ add(tbl, tbl, offset);
@@ -1020,12 +1016,7 @@
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
- ExternalAddress state(SafepointSynchronize::address_of_state());
- unsigned long offset;
- __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
- __ ldrw(rscratch1, Address(rscratch1, offset));
- assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
- __ cbnz(rscratch1, slow_path);
+ __ safepoint_poll(slow_path);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@@ -1375,7 +1366,7 @@
if (os::is_MP()) {
if (UseMembar) {
// Force this write out before the read below
- __ dsb(Assembler::SY);
+ __ dmb(Assembler::ISH);
} else {
// Write serialization page so VM thread can do a pseudo remote membar.
// We use the current thread pointer to calculate a thread specific
@@ -1387,16 +1378,8 @@
// check for safepoint operation in progress and/or pending suspend requests
{
- Label Continue;
- {
- unsigned long offset;
- __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset);
- __ ldrw(rscratch2, Address(rscratch2, offset));
- }
- assert(SafepointSynchronize::_not_synchronized == 0,
- "SafepointSynchronize::_not_synchronized");
- Label L;
- __ cbnz(rscratch2, L);
+ Label L, Continue;
+ __ safepoint_poll_acquire(L);
__ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbz(rscratch2, Continue);
__ bind(L);
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp Fri Nov 24 17:19:47 2017 +0000
@@ -1717,7 +1717,7 @@
__ push_i(r1);
// Adjust the bcp by the 16-bit displacement in r2
__ add(rbcp, rbcp, r2);
- __ dispatch_only(vtos);
+ __ dispatch_only(vtos, /*generate_poll*/true);
return;
}
@@ -1833,7 +1833,7 @@
// continue with the bytecode @ target
// rscratch1: target bytecode
// rbcp: target bcp
- __ dispatch_only(vtos);
+ __ dispatch_only(vtos, /*generate_poll*/true);
if (UseLoopCounter) {
if (ProfileInterpreter) {
@@ -1973,7 +1973,7 @@
__ ldr(rbcp, Address(rmethod, Method::const_offset()));
__ lea(rbcp, Address(rbcp, r1));
__ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
- __ dispatch_next(vtos);
+ __ dispatch_next(vtos, 0, /*generate_poll*/true);
}
void TemplateTable::wide_ret() {
@@ -1984,7 +1984,7 @@
__ ldr(rbcp, Address(rmethod, Method::const_offset()));
__ lea(rbcp, Address(rbcp, r1));
__ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
- __ dispatch_next(vtos);
+ __ dispatch_next(vtos, 0, /*generate_poll*/true);
}
@@ -2014,7 +2014,7 @@
__ rev32(r3, r3);
__ load_unsigned_byte(rscratch1, Address(rbcp, r3, Address::sxtw(0)));
__ add(rbcp, rbcp, r3, ext::sxtw);
- __ dispatch_only(vtos);
+ __ dispatch_only(vtos, /*generate_poll*/true);
// handle default
__ bind(default_case);
__ profile_switch_default(r0);
@@ -2064,7 +2064,7 @@
__ rev32(r3, r3);
__ add(rbcp, rbcp, r3, ext::sxtw);
__ ldrb(rscratch1, Address(rbcp, 0));
- __ dispatch_only(vtos);
+ __ dispatch_only(vtos, /*generate_poll*/true);
}
void TemplateTable::fast_binaryswitch() {
@@ -2162,7 +2162,7 @@
__ rev32(j, j);
__ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
__ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
- __ dispatch_only(vtos);
+ __ dispatch_only(vtos, /*generate_poll*/true);
// default case -> j = default offset
__ bind(default_case);
@@ -2171,7 +2171,7 @@
__ rev32(j, j);
__ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
__ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
- __ dispatch_only(vtos);
+ __ dispatch_only(vtos, /*generate_poll*/true);
}