--- a/hotspot/src/os/linux/vm/os_linux.cpp Tue Aug 27 18:55:33 2013 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.cpp Thu Aug 29 06:53:16 2013 -0700
@@ -2767,7 +2767,19 @@
Linux::numa_interleave_memory(addr, bytes);
}
+// Define for numa_set_bind_policy(int). Setting the argument to 0 will set the
+// bind policy to MPOL_PREFERRED for the current thread.
+#define USE_MPOL_PREFERRED 0
+
void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
+ // To make NUMA and large pages more robust when both enabled, we need to ease
+ // the requirements on where the memory should be allocated. MPOL_BIND is the
+ // default policy and it will force memory to be allocated on the specified
+ // node. Changing this to MPOL_PREFERRED will prefer to allocate the memory on
+ // the specified node, but will not force it. Using this policy will prevent
+ // getting SIGBUS when trying to allocate large pages on NUMA nodes with no
+ // free large pages.
+ Linux::numa_set_bind_policy(USE_MPOL_PREFERRED);
Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
}
@@ -2869,6 +2881,8 @@
libnuma_dlsym(handle, "numa_tonode_memory")));
set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
libnuma_dlsym(handle, "numa_interleave_memory")));
+ set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t,
+ libnuma_dlsym(handle, "numa_set_bind_policy")));
if (numa_available() != -1) {
@@ -2935,6 +2949,7 @@
os::Linux::numa_available_func_t os::Linux::_numa_available;
os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
+os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
unsigned long* os::Linux::_numa_all_nodes;
bool os::pd_uncommit_memory(char* addr, size_t size) {
--- a/hotspot/src/os/linux/vm/os_linux.hpp Tue Aug 27 18:55:33 2013 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.hpp Thu Aug 29 06:53:16 2013 -0700
@@ -235,6 +235,7 @@
typedef int (*numa_available_func_t)(void);
typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
+ typedef void (*numa_set_bind_policy_func_t)(int policy);
static sched_getcpu_func_t _sched_getcpu;
static numa_node_to_cpus_func_t _numa_node_to_cpus;
@@ -242,6 +243,7 @@
static numa_available_func_t _numa_available;
static numa_tonode_memory_func_t _numa_tonode_memory;
static numa_interleave_memory_func_t _numa_interleave_memory;
+ static numa_set_bind_policy_func_t _numa_set_bind_policy;
static unsigned long* _numa_all_nodes;
static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
@@ -250,6 +252,7 @@
static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
+ static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
static int sched_getcpu_syscall(void);
public:
@@ -267,6 +270,11 @@
_numa_interleave_memory(start, size, _numa_all_nodes);
}
}
+ static void numa_set_bind_policy(int policy) {
+ if (_numa_set_bind_policy != NULL) {
+ _numa_set_bind_policy(policy);
+ }
+ }
static int get_node_by_cpu(int cpu_id);
};