8016155: SIGBUS when running Kitchensink with ParallelScavenge and ParallelOld
Summary: When using NUMA and large pages we need to ease the requirement on which node the memory should be allocated on. To avoid the SIGBUS we now use the memory policy MPOL_PREFERRED, which prefers a certain node, instead of MPOL_BIND, which requires a certain node.
Reviewed-by: jmasa, pliden
Contributed-by: stefan.johansson@oracle.com
--- a/hotspot/src/os/linux/vm/os_linux.cpp Fri Aug 23 15:59:20 2013 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.cpp Thu Aug 22 10:50:41 2013 +0200
@@ -2796,7 +2796,19 @@
Linux::numa_interleave_memory(addr, bytes);
}
+// Define for numa_set_bind_policy(int). Setting the argument to 0 will set the
+// bind policy to MPOL_PREFERRED for the current thread.
+#define USE_MPOL_PREFERRED 0
+
void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
+ // To make NUMA and large pages more robust when both enabled, we need to ease
+ // the requirements on where the memory should be allocated. MPOL_BIND is the
+ // default policy and it will force memory to be allocated on the specified
+ // node. Changing this to MPOL_PREFERRED will prefer to allocate the memory on
+ // the specified node, but will not force it. Using this policy will prevent
+ // getting SIGBUS when trying to allocate large pages on NUMA nodes with no
+ // free large pages.
+ Linux::numa_set_bind_policy(USE_MPOL_PREFERRED);
Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
}
@@ -2898,6 +2910,8 @@
libnuma_dlsym(handle, "numa_tonode_memory")));
set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
libnuma_dlsym(handle, "numa_interleave_memory")));
+ set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t,
+ libnuma_dlsym(handle, "numa_set_bind_policy")));
if (numa_available() != -1) {
@@ -2964,6 +2978,7 @@
os::Linux::numa_available_func_t os::Linux::_numa_available;
os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
+os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
unsigned long* os::Linux::_numa_all_nodes;
bool os::pd_uncommit_memory(char* addr, size_t size) {
--- a/hotspot/src/os/linux/vm/os_linux.hpp Fri Aug 23 15:59:20 2013 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.hpp Thu Aug 22 10:50:41 2013 +0200
@@ -221,6 +221,7 @@
typedef int (*numa_available_func_t)(void);
typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
+ typedef void (*numa_set_bind_policy_func_t)(int policy);
static sched_getcpu_func_t _sched_getcpu;
static numa_node_to_cpus_func_t _numa_node_to_cpus;
@@ -228,6 +229,7 @@
static numa_available_func_t _numa_available;
static numa_tonode_memory_func_t _numa_tonode_memory;
static numa_interleave_memory_func_t _numa_interleave_memory;
+ static numa_set_bind_policy_func_t _numa_set_bind_policy;
static unsigned long* _numa_all_nodes;
static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
@@ -236,6 +238,7 @@
static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
+ static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
static int sched_getcpu_syscall(void);
public:
@@ -253,6 +256,11 @@
_numa_interleave_memory(start, size, _numa_all_nodes);
}
}
+ static void numa_set_bind_policy(int policy) {
+ if (_numa_set_bind_policy != NULL) {
+ _numa_set_bind_policy(policy);
+ }
+ }
static int get_node_by_cpu(int cpu_id);
};