8050147: StoreLoad barrier interferes with stack usages
Reviewed-by: jrose, kvn, drchase
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Thu Sep 04 13:11:25 2014 +0400
@@ -26,6 +26,7 @@
#define CPU_X86_VM_ASSEMBLER_X86_HPP
#include "asm/register.hpp"
+#include "vm_version_x86.hpp"
class BiasedLockingCounters;
@@ -1292,14 +1293,34 @@
if (order_constraint & StoreLoad) {
// All usable chips support "locked" instructions which suffice
// as barriers, and are much faster than the alternative of
- // using cpuid instruction. We use here a locked add [esp],0.
+ // using cpuid instruction. We use here a locked add [esp-C],0.
// This is conveniently otherwise a no-op except for blowing
- // flags.
+ // flags, and introducing a false dependency on target memory
+ // location. We can't do anything with flags, but we can avoid
+ // memory dependencies in the current method by locked-adding
+ // somewhere else on the stack. Doing [esp+C] will collide with
+ // something on stack in current method, hence we go for [esp-C].
+ // It is convenient since it is almost always in data cache, for
+ // any small C. We need to step back from SP to avoid data
+ // dependencies with other things on below SP (callee-saves, for
+ // example). Without a clear way to figure out the minimal safe
+ // distance from SP, it makes sense to step back the complete
+ // cache line, as this will also avoid possible second-order effects
+ // with locked ops against the cache line. Our choice of offset
+ // is bounded by x86 operand encoding, which should stay within
+ // [-128; +127] to have the 8-byte displacement encoding.
+ //
// Any change to this code may need to revisit other places in
// the code where this idiom is used, in particular the
// orderAccess code.
+
+ int offset = -VM_Version::L1_line_size();
+ if (offset < -128) {
+ offset = -128;
+ }
+
lock();
- addl(Address(rsp, 0), 0);// Assert the lock# signal here
+ addl(Address(rsp, offset), 0);// Assert the lock# signal here
}
}
}
--- a/hotspot/src/os_cpu/aix_ppc/vm/os_aix_ppc.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/aix_ppc/vm/os_aix_ppc.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -563,3 +563,8 @@
assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
}
#endif
+
+void os::extra_bang_size_in_bytes() {
+ // PPC does not require the additional stack bang.
+ return 0;
+}
--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -1030,3 +1030,8 @@
void os::verify_stack_alignment() {
}
#endif
+
+int os::extra_bang_size_in_bytes() {
+ // JDK-8050147 requires the full cache line bang for x86.
+ return VM_Version::L1_line_size();
+}
--- a/hotspot/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -465,3 +465,8 @@
void os::verify_stack_alignment() {
}
#endif
+
+int os::extra_bang_size_in_bytes() {
+ // Zero does not require an additional stack bang.
+ return 0;
+}
--- a/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -612,3 +612,8 @@
assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
}
#endif
+
+int os:extra_bang_size_in_bytes() {
+ // PPC does not require the additional stack bang.
+ return 0;
+}
--- a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -752,3 +752,8 @@
void os::verify_stack_alignment() {
}
#endif
+
+int os::extra_bang_size_in_bytes() {
+ // SPARC does not require the additional stack bang.
+ return 0;
+}
--- a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -930,3 +930,8 @@
// keep the page mapped so CS limit isn't reduced.
#endif
}
+
+int os::extra_bang_size_in_bytes() {
+ // JDK-8050147 requires the full cache line bang for x86.
+ return VM_Version::L1_line_size();
+}
--- a/hotspot/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -495,3 +495,8 @@
void os::verify_stack_alignment() {
}
#endif
+
+int os::extra_bang_size_in_bytes() {
+ // Zero does not require an additional stack banging.
+ return 0;
+}
--- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -774,3 +774,8 @@
void os::verify_stack_alignment() {
}
#endif
+
+int os::extra_bang_size_in_bytes() {
+ // SPARC does not require an additional stack bang.
+ return 0;
+}
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -918,3 +918,8 @@
#endif
}
#endif
+
+int os::extra_bang_size_in_bytes() {
+ // JDK-8050147 requires the full cache line bang for x86.
+ return VM_Version::L1_line_size();
+}
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -639,3 +639,8 @@
#endif
}
#endif
+
+int os::extra_bang_size_in_bytes() {
+ // JDK-8050147 requires the full cache line bang for x86.
+ return VM_Version::L1_line_size();
+}
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -170,7 +170,7 @@
// removes the need to bang the stack in the deoptimization blob which
// in turn simplifies stack overflow handling.
int LIR_Assembler::bang_size_in_bytes() const {
- return MAX2(initial_frame_size_in_bytes(), _compilation->interpreter_frame_size());
+ return MAX2(initial_frame_size_in_bytes() + os::extra_bang_size_in_bytes(), _compilation->interpreter_frame_size());
}
void LIR_Assembler::emit_exception_entries(ExceptionInfoList* info_list) {
--- a/hotspot/src/share/vm/opto/compile.cpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/share/vm/opto/compile.cpp Thu Sep 04 13:11:25 2014 +0400
@@ -430,7 +430,7 @@
// removes the need to bang the stack in the deoptimization blob which
// in turn simplifies stack overflow handling.
int Compile::bang_size_in_bytes() const {
- return MAX2(_interpreter_frame_size, frame_size_in_bytes());
+ return MAX2(frame_size_in_bytes() + os::extra_bang_size_in_bytes(), _interpreter_frame_size);
}
// ============================================================================
--- a/hotspot/src/share/vm/runtime/os.hpp Wed Sep 03 14:39:13 2014 +0200
+++ b/hotspot/src/share/vm/runtime/os.hpp Thu Sep 04 13:11:25 2014 +0400
@@ -761,6 +761,9 @@
// Hook for os specific jvm options that we don't want to abort on seeing
static bool obsolete_option(const JavaVMOption *option);
+ // Amount beyond the callee frame size that we bang the stack.
+ static int extra_bang_size_in_bytes();
+
// Extensions
#include "runtime/os_ext.hpp"