8198794: Hotspot crash on Cassandra 3.11.1 startup with libnuma 2.0.3
authorgromero
Thu, 15 Mar 2018 20:52:29 -0400
changeset 49410 7fb0ad2d8749
parent 49409 66ba2092464c
child 49411 7a656b77a2d8
8198794: Hotspot crash on Cassandra 3.11.1 startup with libnuma 2.0.3 Reviewed-by: dholmes, phh
src/hotspot/os/linux/os_linux.hpp
--- a/src/hotspot/os/linux/os_linux.hpp	Wed Mar 14 11:09:26 2018 -0700
+++ b/src/hotspot/os/linux/os_linux.hpp	Thu Mar 15 20:52:29 2018 -0400
@@ -260,8 +260,8 @@
   static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
   static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
   static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
-  static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = *ptr; }
-  static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = *ptr; }
+  static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
+  static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
   static int sched_getcpu_syscall(void);
  public:
   static int sched_getcpu()  { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
@@ -305,6 +305,18 @@
   static bool isnode_in_existing_nodes(unsigned int n) {
     if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) {
       return _numa_bitmask_isbitset(_numa_nodes_ptr, n);
+    } else if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
+      // Not all libnuma API v2 implement numa_nodes_ptr, so it's not possible
+      // to trust the API version for checking its absence. On the other hand,
+      // numa_nodes_ptr found in libnuma 2.0.9 and above is the only way to get
+      // a complete view of all numa nodes in the system, hence numa_nodes_ptr
+      // is used to handle CPU and nodes on architectures (like PowerPC) where
+      // there can exist nodes with CPUs but no memory or vice-versa and the
+      // nodes may be non-contiguous. For most of the architectures, like
+      // x86_64, numa_node_ptr presents the same node set as found in
+      // numa_all_nodes_ptr so it's possible to use numa_all_nodes_ptr as a
+      // substitute.
+      return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
     } else
       return 0;
   }