# HG changeset patch # User iveresov # Date 1228342737 28800 # Node ID b46d9f19bde27c2339157d9c6f72cffb90166a99 # Parent 5dddd195cc8648dc73d7fabf5d2deea4be7012a2 6779436: NUMA allocator: libnuma expects certain size of the buffer in numa_node_to_cpus() Summary: In os::Linux::rebuild_cpu_to_node_map() fix the size of the CPU bitmap. Fixed arithmetic in MutableNUMASpace::adaptive_chunk_size() that could cause overflows and underflows of the chunk_size variable. Reviewed-by: apetrusenko diff -r 5dddd195cc86 -r b46d9f19bde2 hotspot/src/os/linux/vm/os_linux.cpp --- a/hotspot/src/os/linux/vm/os_linux.cpp Mon Dec 01 23:25:24 2008 -0800 +++ b/hotspot/src/os/linux/vm/os_linux.cpp Wed Dec 03 14:18:57 2008 -0800 @@ -2272,7 +2272,9 @@ uncommit_memory(addr, bytes); } -void os::numa_make_global(char *addr, size_t bytes) { } +void os::numa_make_global(char *addr, size_t bytes) { + Linux::numa_interleave_memory(addr, bytes); +} void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { Linux::numa_tonode_memory(addr, bytes, lgrp_hint); @@ -2314,7 +2316,7 @@ extern "C" void numa_warn(int number, char *where, ...) { } extern "C" void numa_error(char *where) { } -void os::Linux::libnuma_init() { +bool os::Linux::libnuma_init() { // sched_getcpu() should be in libc. set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, dlsym(RTLD_DEFAULT, "sched_getcpu"))); @@ -2330,31 +2332,51 @@ dlsym(handle, "numa_available"))); set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t, dlsym(handle, "numa_tonode_memory"))); + set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t, + dlsym(handle, "numa_interleave_memory"))); + + if (numa_available() != -1) { + set_numa_all_nodes((unsigned long*)dlsym(handle, "numa_all_nodes")); // Create a cpu -> node mapping _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray(0, true); rebuild_cpu_to_node_map(); + return true; } } } + return false; } // rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id. // The table is later used in get_node_by_cpu(). void os::Linux::rebuild_cpu_to_node_map() { - int cpu_num = os::active_processor_count(); + const size_t NCPUS = 32768; // Since the buffer size computation is very obscure + // in libnuma (possible values are starting from 16, + // and continuing up with every other power of 2, but less + // than the maximum number of CPUs supported by kernel), and + // is a subject to change (in libnuma version 2 the requirements + // are more reasonable) we'll just hardcode the number they use + // in the library. + const size_t BitsPerCLong = sizeof(long) * CHAR_BIT; + + size_t cpu_num = os::active_processor_count(); + size_t cpu_map_size = NCPUS / BitsPerCLong; + size_t cpu_map_valid_size = + MIN2((cpu_num + BitsPerCLong - 1) / BitsPerCLong, cpu_map_size); + cpu_to_node()->clear(); cpu_to_node()->at_grow(cpu_num - 1); - int node_num = numa_get_groups_num(); - int cpu_map_size = (cpu_num + BitsPerLong - 1) / BitsPerLong; + size_t node_num = numa_get_groups_num(); + unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size); - for (int i = 0; i < node_num; i++) { + for (size_t i = 0; i < node_num; i++) { if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) { - for (int j = 0; j < cpu_map_size; j++) { + for (size_t j = 0; j < cpu_map_valid_size; j++) { if (cpu_map[j] != 0) { - for (int k = 0; k < BitsPerLong; k++) { + for (size_t k = 0; k < BitsPerCLong; k++) { if (cpu_map[j] & (1UL << k)) { - cpu_to_node()->at_put(j * BitsPerLong + k, i); + cpu_to_node()->at_put(j * BitsPerCLong + k, i); } } } @@ -2377,7 +2399,8 @@ os::Linux::numa_max_node_func_t os::Linux::_numa_max_node; os::Linux::numa_available_func_t os::Linux::_numa_available; os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory; - +os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory; +unsigned long* os::Linux::_numa_all_nodes; bool os::uncommit_memory(char* addr, size_t size) { return ::mmap(addr, size, @@ -3695,7 +3718,17 @@ } if (UseNUMA) { - Linux::libnuma_init(); + if (!Linux::libnuma_init()) { + UseNUMA = false; + } else { + if ((Linux::numa_max_node() < 1)) { + // There's only one node(they start from 0), disable NUMA. + UseNUMA = false; + } + } + if (!UseNUMA && ForceNUMA) { + UseNUMA = true; + } } if (MaxFDLimit) { diff -r 5dddd195cc86 -r b46d9f19bde2 hotspot/src/os/linux/vm/os_linux.hpp --- a/hotspot/src/os/linux/vm/os_linux.hpp Mon Dec 01 23:25:24 2008 -0800 +++ b/hotspot/src/os/linux/vm/os_linux.hpp Wed Dec 03 14:18:57 2008 -0800 @@ -146,7 +146,7 @@ static bool is_floating_stack() { return _is_floating_stack; } static void libpthread_init(); - static void libnuma_init(); + static bool libnuma_init(); // Minimum stack size a thread can be created with (allowing // the VM to completely create the thread and enter user code) @@ -240,20 +240,23 @@ typedef int (*numa_max_node_func_t)(void); typedef int (*numa_available_func_t)(void); typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node); - + typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask); static sched_getcpu_func_t _sched_getcpu; static numa_node_to_cpus_func_t _numa_node_to_cpus; static numa_max_node_func_t _numa_max_node; static numa_available_func_t _numa_available; static numa_tonode_memory_func_t _numa_tonode_memory; + static numa_interleave_memory_func_t _numa_interleave_memory; + static unsigned long* _numa_all_nodes; static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; } static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; } static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; } static void set_numa_available(numa_available_func_t func) { _numa_available = func; } static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; } - + static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; } + static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; } public: static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; } static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) { @@ -264,6 +267,11 @@ static int numa_tonode_memory(void *start, size_t size, int node) { return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1; } + static void numa_interleave_memory(void *start, size_t size) { + if (_numa_interleave_memory != NULL && _numa_all_nodes != NULL) { + _numa_interleave_memory(start, size, _numa_all_nodes); + } + } static int get_node_by_cpu(int cpu_id); }; diff -r 5dddd195cc86 -r b46d9f19bde2 hotspot/src/os/solaris/vm/os_solaris.cpp --- a/hotspot/src/os/solaris/vm/os_solaris.cpp Mon Dec 01 23:25:24 2008 -0800 +++ b/hotspot/src/os/solaris/vm/os_solaris.cpp Wed Dec 03 14:18:57 2008 -0800 @@ -4638,7 +4638,7 @@ } } -void os::Solaris::liblgrp_init() { +bool os::Solaris::liblgrp_init() { void *handle = dlopen("liblgrp.so.1", RTLD_LAZY); if (handle != NULL) { os::Solaris::set_lgrp_home(CAST_TO_FN_PTR(lgrp_home_func_t, dlsym(handle, "lgrp_home"))); @@ -4653,9 +4653,9 @@ lgrp_cookie_t c = lgrp_init(LGRP_VIEW_CALLER); set_lgrp_cookie(c); - } else { - warning("your OS does not support NUMA"); - } + return true; + } + return false; } void os::Solaris::misc_sym_init() { @@ -4824,9 +4824,25 @@ vm_page_size())); Solaris::libthread_init(); + if (UseNUMA) { - Solaris::liblgrp_init(); - } + if (!Solaris::liblgrp_init()) { + UseNUMA = false; + } else { + size_t lgrp_limit = os::numa_get_groups_num(); + int *lgrp_ids = NEW_C_HEAP_ARRAY(int, lgrp_limit); + size_t lgrp_num = os::numa_get_leaf_groups(lgrp_ids, lgrp_limit); + FREE_C_HEAP_ARRAY(int, lgrp_ids); + if (lgrp_num < 2) { + // There's only one locality group, disable NUMA. + UseNUMA = false; + } + } + if (!UseNUMA && ForceNUMA) { + UseNUMA = true; + } + } + Solaris::misc_sym_init(); Solaris::signal_sets_init(); Solaris::init_signal_mem(); diff -r 5dddd195cc86 -r b46d9f19bde2 hotspot/src/os/solaris/vm/os_solaris.hpp --- a/hotspot/src/os/solaris/vm/os_solaris.hpp Mon Dec 01 23:25:24 2008 -0800 +++ b/hotspot/src/os/solaris/vm/os_solaris.hpp Wed Dec 03 14:18:57 2008 -0800 @@ -176,7 +176,7 @@ public: static void libthread_init(); static void synchronization_init(); - static void liblgrp_init(); + static bool liblgrp_init(); // Load miscellaneous symbols. static void misc_sym_init(); // This boolean allows users to forward their own non-matching signals diff -r 5dddd195cc86 -r b46d9f19bde2 hotspot/src/os/windows/vm/os_windows.cpp --- a/hotspot/src/os/windows/vm/os_windows.cpp Mon Dec 01 23:25:24 2008 -0800 +++ b/hotspot/src/os/windows/vm/os_windows.cpp Wed Dec 03 14:18:57 2008 -0800 @@ -3353,6 +3353,10 @@ // initialize thread priority policy prio_init(); + if (UseNUMA && !ForceNUMA) { + UseNUMA = false; // Currently unsupported. + } + return JNI_OK; } diff -r 5dddd195cc86 -r b46d9f19bde2 hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp --- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Mon Dec 01 23:25:24 2008 -0800 +++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Wed Dec 03 14:18:57 2008 -0800 @@ -414,9 +414,20 @@ if (limit > 0) { limit = round_down(limit, page_size()); if (chunk_size > current_chunk_size(i)) { - chunk_size = MIN2((off_t)chunk_size, (off_t)current_chunk_size(i) + (off_t)limit); + size_t upper_bound = pages_available * page_size(); + if (upper_bound > limit && + current_chunk_size(i) < upper_bound - limit) { + // The resulting upper bound should not exceed the available + // amount of memory (pages_available * page_size()). + upper_bound = current_chunk_size(i) + limit; + } + chunk_size = MIN2(chunk_size, upper_bound); } else { - chunk_size = MAX2((off_t)chunk_size, (off_t)current_chunk_size(i) - (off_t)limit); + size_t lower_bound = page_size(); + if (current_chunk_size(i) > limit) { // lower_bound shouldn't underflow. + lower_bound = current_chunk_size(i) - limit; + } + chunk_size = MAX2(chunk_size, lower_bound); } } assert(chunk_size <= pages_available * page_size(), "Chunk size out of range"); diff -r 5dddd195cc86 -r b46d9f19bde2 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Mon Dec 01 23:25:24 2008 -0800 +++ b/hotspot/src/share/vm/runtime/globals.hpp Wed Dec 03 14:18:57 2008 -0800 @@ -342,6 +342,9 @@ product(bool, UseNUMA, false, \ "Use NUMA if available") \ \ + product(bool, ForceNUMA, false, \ + "Force NUMA optimizations on single-node/UMA systems") \ + \ product(intx, NUMAChunkResizeWeight, 20, \ "Percentage (0-100) used to weight the current sample when " \ "computing exponentially decaying average for " \