6684395: Port NUMA-aware allocator to linux
authoriveresov
Tue, 29 Apr 2008 13:51:26 +0400
changeset 388 bcc631c5bbec
parent 387 6b17ecb32336
child 389 a44227868a4a
6684395: Port NUMA-aware allocator to linux Summary: NUMA-aware allocator port to Linux Reviewed-by: jmasa, apetrusenko
hotspot/build/linux/makefiles/mapfile-vers-debug
hotspot/build/linux/makefiles/mapfile-vers-product
hotspot/src/os/linux/vm/os_linux.cpp
hotspot/src/os/linux/vm/os_linux.hpp
hotspot/src/os/linux/vm/os_linux.inline.hpp
hotspot/src/os/solaris/vm/os_solaris.cpp
hotspot/src/os/solaris/vm/os_solaris.inline.hpp
hotspot/src/os/windows/vm/os_windows.cpp
hotspot/src/os/windows/vm/os_windows.inline.hpp
hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
hotspot/src/share/vm/includeDB_core
hotspot/src/share/vm/runtime/os.hpp
--- a/hotspot/build/linux/makefiles/mapfile-vers-debug	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/build/linux/makefiles/mapfile-vers-debug	Tue Apr 29 13:51:26 2008 +0400
@@ -272,7 +272,9 @@
                 jio_snprintf;
                 jio_vfprintf;
                 jio_vsnprintf;
-		fork1;
+                fork1;
+                numa_warn;
+                numa_error;
 
                 # Needed because there is no JVM interface for this.
                 sysThreadAvailableStackWithSlack;
--- a/hotspot/build/linux/makefiles/mapfile-vers-product	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/build/linux/makefiles/mapfile-vers-product	Tue Apr 29 13:51:26 2008 +0400
@@ -267,7 +267,9 @@
                 jio_snprintf;
                 jio_vfprintf;
                 jio_vsnprintf;
-		fork1;
+                fork1;
+                numa_warn;
+                numa_error;
 
                 # Needed because there is no JVM interface for this.
                 sysThreadAvailableStackWithSlack;
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Tue Apr 29 13:51:26 2008 +0400
@@ -2228,20 +2228,42 @@
 }
 
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes)         { }
+
+void os::free_memory(char *addr, size_t bytes) {
+  uncommit_memory(addr, bytes);
+}
+
 void os::numa_make_global(char *addr, size_t bytes)    { }
-void os::numa_make_local(char *addr, size_t bytes)     { }
-bool os::numa_topology_changed()                       { return false; }
-size_t os::numa_get_groups_num()                       { return 1; }
-int os::numa_get_group_id()                            { return 0; }
-size_t os::numa_get_leaf_groups(int *ids, size_t size) {
-  if (size > 0) {
-    ids[0] = 0;
-    return 1;
+
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
+  Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
+}
+
+bool os::numa_topology_changed()   { return false; }
+
+size_t os::numa_get_groups_num() {
+  int max_node = Linux::numa_max_node();
+  return max_node > 0 ? max_node + 1 : 1;
+}
+
+int os::numa_get_group_id() {
+  int cpu_id = Linux::sched_getcpu();
+  if (cpu_id != -1) {
+    int lgrp_id = Linux::get_node_by_cpu(cpu_id);
+    if (lgrp_id != -1) {
+      return lgrp_id;
+    }
   }
   return 0;
 }
 
+size_t os::numa_get_leaf_groups(int *ids, size_t size) {
+  for (size_t i = 0; i < size; i++) {
+    ids[i] = i;
+  }
+  return size;
+}
+
 bool os::get_page_info(char *start, page_info* info) {
   return false;
 }
@@ -2250,6 +2272,74 @@
   return end;
 }
 
+extern "C" void numa_warn(int number, char *where, ...) { }
+extern "C" void numa_error(char *where) { }
+
+void os::Linux::libnuma_init() {
+  // sched_getcpu() should be in libc.
+  set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
+                                  dlsym(RTLD_DEFAULT, "sched_getcpu")));
+
+  if (sched_getcpu() != -1) { // Does it work?
+    void *handle = dlopen("libnuma.so", RTLD_LAZY);
+    if (handle != NULL) {
+      set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
+                                           dlsym(handle, "numa_node_to_cpus")));
+      set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
+                                       dlsym(handle, "numa_max_node")));
+      set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
+                                        dlsym(handle, "numa_available")));
+      set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
+                                            dlsym(handle, "numa_tonode_memory")));
+      if (numa_available() != -1) {
+        // Create a cpu -> node mapping
+        _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true);
+        rebuild_cpu_to_node_map();
+      }
+    }
+  }
+}
+
+// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
+// The table is later used in get_node_by_cpu().
+void os::Linux::rebuild_cpu_to_node_map() {
+  int cpu_num = os::active_processor_count();
+  cpu_to_node()->clear();
+  cpu_to_node()->at_grow(cpu_num - 1);
+  int node_num = numa_get_groups_num();
+  int cpu_map_size = (cpu_num + BitsPerLong - 1) / BitsPerLong;
+  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size);
+  for (int i = 0; i < node_num; i++) {
+    if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
+      for (int j = 0; j < cpu_map_size; j++) {
+        if (cpu_map[j] != 0) {
+          for (int k = 0; k < BitsPerLong; k++) {
+            if (cpu_map[j] & (1UL << k)) {
+              cpu_to_node()->at_put(j * BitsPerLong + k, i);
+            }
+          }
+        }
+      }
+    }
+  }
+  FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
+}
+
+int os::Linux::get_node_by_cpu(int cpu_id) {
+  if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
+    return cpu_to_node()->at(cpu_id);
+  }
+  return -1;
+}
+
+GrowableArray<int>* os::Linux::_cpu_to_node;
+os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
+os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
+os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
+os::Linux::numa_available_func_t os::Linux::_numa_available;
+os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
+
+
 bool os::uncommit_memory(char* addr, size_t size) {
   return ::mmap(addr, size,
                 PROT_READ|PROT_WRITE|PROT_EXEC,
@@ -3552,6 +3642,10 @@
           Linux::is_floating_stack() ? "floating stack" : "fixed stack");
   }
 
+  if (UseNUMA) {
+    Linux::libnuma_init();
+  }
+
   if (MaxFDLimit) {
     // set the number of file descriptors to max. print out error
     // if getrlimit/setrlimit fails but continue regardless.
--- a/hotspot/src/os/linux/vm/os_linux.hpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/os/linux/vm/os_linux.hpp	Tue Apr 29 13:51:26 2008 +0400
@@ -59,6 +59,8 @@
   static bool _is_NPTL;
   static bool _supports_fast_thread_cpu_time;
 
+  static GrowableArray<int>* _cpu_to_node;
+
  protected:
 
   static julong _physical_memory;
@@ -79,8 +81,9 @@
   static void set_is_LinuxThreads()           { _is_NPTL = false; }
   static void set_is_floating_stack()         { _is_floating_stack = true; }
 
+  static void rebuild_cpu_to_node_map();
+  static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
  public:
-
   static void init_thread_fpu_state();
   static int  get_fpu_control_word();
   static void set_fpu_control_word(int fpu_control);
@@ -143,6 +146,7 @@
   static bool is_floating_stack()             { return _is_floating_stack; }
 
   static void libpthread_init();
+  static void libnuma_init();
 
   // Minimum stack size a thread can be created with (allowing
   // the VM to completely create the thread and enter user code)
@@ -229,6 +233,38 @@
 
     #undef SR_SUSPENDED
   };
+
+private:
+  typedef int (*sched_getcpu_func_t)(void);
+  typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
+  typedef int (*numa_max_node_func_t)(void);
+  typedef int (*numa_available_func_t)(void);
+  typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
+
+
+  static sched_getcpu_func_t _sched_getcpu;
+  static numa_node_to_cpus_func_t _numa_node_to_cpus;
+  static numa_max_node_func_t _numa_max_node;
+  static numa_available_func_t _numa_available;
+  static numa_tonode_memory_func_t _numa_tonode_memory;
+
+  static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
+  static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
+  static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
+  static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
+  static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
+
+public:
+  static int sched_getcpu()  { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
+  static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
+    return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
+  }
+  static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
+  static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
+  static int numa_tonode_memory(void *start, size_t size, int node) {
+    return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
+  }
+  static int get_node_by_cpu(int cpu_id);
 };
 
 
--- a/hotspot/src/os/linux/vm/os_linux.inline.hpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/os/linux/vm/os_linux.inline.hpp	Tue Apr 29 13:51:26 2008 +0400
@@ -120,3 +120,6 @@
   RESTARTABLE(_cmd, _result); \
   return _result; \
 } while(false)
+
+inline bool os::numa_has_static_binding()   { return true; }
+inline bool os::numa_has_group_homing()     { return false;  }
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp	Tue Apr 29 13:51:26 2008 +0400
@@ -2602,7 +2602,7 @@
 }
 
 // Tell the OS to make the range local to the first-touching LWP
-void os::numa_make_local(char *addr, size_t bytes) {
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
   assert((intptr_t)addr % os::vm_page_size() == 0, "Address should be page-aligned.");
   if (madvise(addr, bytes, MADV_ACCESS_LWP) < 0) {
     debug_only(warning("MADV_ACCESS_LWP failed."));
--- a/hotspot/src/os/solaris/vm/os_solaris.inline.hpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/os/solaris/vm/os_solaris.inline.hpp	Tue Apr 29 13:51:26 2008 +0400
@@ -204,3 +204,6 @@
   RESTARTABLE(_cmd, _result); \
   return _result; \
 } while(false)
+
+inline bool os::numa_has_static_binding()   { return false; }
+inline bool os::numa_has_group_homing()     { return true;  }
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Tue Apr 29 13:51:26 2008 +0400
@@ -2581,7 +2581,7 @@
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
 void os::free_memory(char *addr, size_t bytes)         { }
 void os::numa_make_global(char *addr, size_t bytes)    { }
-void os::numa_make_local(char *addr, size_t bytes)     { }
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint)    { }
 bool os::numa_topology_changed()                       { return false; }
 size_t os::numa_get_groups_num()                       { return 1; }
 int os::numa_get_group_id()                            { return 0; }
--- a/hotspot/src/os/windows/vm/os_windows.inline.hpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/os/windows/vm/os_windows.inline.hpp	Tue Apr 29 13:51:26 2008 +0400
@@ -69,3 +69,6 @@
     *((int *)(sp - (pages * vm_page_size()))) = 0;
   }
 }
+
+inline bool os::numa_has_static_binding()   { return true;   }
+inline bool os::numa_has_group_homing()     { return false;  }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Tue Apr 29 13:51:26 2008 +0400
@@ -169,8 +169,9 @@
   size_t large_typearray_limit() { return FastAllocateSizeLimit; }
 
   bool supports_inline_contig_alloc() const { return !UseNUMA; }
-  HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : NULL; }
-  HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : NULL; }
+
+  HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : (HeapWord**)-1; }
+  HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : (HeapWord**)-1; }
 
   void ensure_parsability(bool retire_tlabs);
   void accumulate_statistics_all_tlabs();
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Tue Apr 29 13:51:26 2008 +0400
@@ -46,9 +46,11 @@
   for (int i = 0; i < lgrp_spaces()->length(); i++) {
     LGRPSpace *ls = lgrp_spaces()->at(i);
     MutableSpace *s = ls->space();
-    HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
-    if (top < s->end()) {
-      ls->add_invalid_region(MemRegion(top, s->end()));
+    if (!os::numa_has_static_binding()) {
+      HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
+      if (top < s->end()) {
+        ls->add_invalid_region(MemRegion(top, s->end()));
+      }
     }
     s->mangle_unused_area();
   }
@@ -70,32 +72,36 @@
                                     area_touched_words);
         }
 #endif
-        MemRegion invalid;
-        HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
-        HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
-                                                     os::vm_page_size());
-        if (crossing_start != crossing_end) {
-          // If object header crossed a small page boundary we mark the area
-          // as invalid rounding it to a page_size().
-          HeapWord *start = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
-          HeapWord *end = MIN2((HeapWord*)round_to((intptr_t)(s->top() + area_touched_words), page_size()),
-                               s->end());
-          invalid = MemRegion(start, end);
+        if (!os::numa_has_static_binding()) {
+          MemRegion invalid;
+          HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
+          HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
+                                                       os::vm_page_size());
+          if (crossing_start != crossing_end) {
+            // If object header crossed a small page boundary we mark the area
+            // as invalid rounding it to a page_size().
+            HeapWord *start = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
+            HeapWord *end = MIN2((HeapWord*)round_to((intptr_t)(s->top() + area_touched_words), page_size()),
+                                 s->end());
+            invalid = MemRegion(start, end);
+          }
+
+          ls->add_invalid_region(invalid);
         }
-
-        ls->add_invalid_region(invalid);
         s->set_top(s->end());
       }
     } else {
+      if (!os::numa_has_static_binding()) {
 #ifdef ASSERT
-      MemRegion invalid(s->top(), s->end());
-      ls->add_invalid_region(invalid);
-#else
-      if (ZapUnusedHeapArea) {
         MemRegion invalid(s->top(), s->end());
         ls->add_invalid_region(invalid);
-      } else break;
+#else
+        if (ZapUnusedHeapArea) {
+          MemRegion invalid(s->top(), s->end());
+          ls->add_invalid_region(invalid);
+        } else break;
 #endif
+      }
     }
   }
 }
@@ -194,7 +200,7 @@
 }
 
 // Bias region towards the first-touching lgrp. Set the right page sizes.
-void MutableNUMASpace::bias_region(MemRegion mr) {
+void MutableNUMASpace::bias_region(MemRegion mr, int lgrp_id) {
   HeapWord *start = (HeapWord*)round_to((intptr_t)mr.start(), page_size());
   HeapWord *end = (HeapWord*)round_down((intptr_t)mr.end(), page_size());
   if (end > start) {
@@ -202,9 +208,13 @@
     assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
            (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
     assert(region().contains(aligned_region), "Sanity");
-    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    // First we tell the OS which page size we want in the given range. The underlying
+    // large page can be broken down if we require small pages.
     os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
-    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size());
+    // Then we uncommit the pages in the range.
+    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    // And make them local/first-touch biased.
+    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
   }
 }
 
@@ -233,10 +243,12 @@
     initialize(region(), true);
   } else {
     bool should_initialize = false;
-    for (int i = 0; i < lgrp_spaces()->length(); i++) {
-      if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
-        should_initialize = true;
-        break;
+    if (!os::numa_has_static_binding()) {
+      for (int i = 0; i < lgrp_spaces()->length(); i++) {
+        if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
+          should_initialize = true;
+          break;
+        }
       }
     }
 
@@ -472,8 +484,8 @@
       intersection = MemRegion(new_region.start(), new_region.start());
     }
     select_tails(new_region, intersection, &bottom_region, &top_region);
-    bias_region(bottom_region);
-    bias_region(top_region);
+    bias_region(bottom_region, lgrp_spaces()->at(0)->lgrp_id());
+    bias_region(top_region, lgrp_spaces()->at(lgrp_spaces()->length() - 1)->lgrp_id());
   }
 
   // Check if the space layout has changed significantly?
@@ -545,22 +557,37 @@
       intersection = MemRegion(new_region.start(), new_region.start());
     }
 
-    MemRegion invalid_region = ls->invalid_region().intersection(new_region);
-    if (!invalid_region.is_empty()) {
-      merge_regions(new_region, &intersection, &invalid_region);
-      free_region(invalid_region);
+    if (!os::numa_has_static_binding()) {
+      MemRegion invalid_region = ls->invalid_region().intersection(new_region);
+      // Invalid region is a range of memory that could've possibly
+      // been allocated on the other node. That's relevant only on Solaris where
+      // there is no static memory binding.
+      if (!invalid_region.is_empty()) {
+        merge_regions(new_region, &intersection, &invalid_region);
+        free_region(invalid_region);
+        ls->set_invalid_region(MemRegion());
+      }
     }
+
     select_tails(new_region, intersection, &bottom_region, &top_region);
-    free_region(bottom_region);
-    free_region(top_region);
+
+    if (!os::numa_has_static_binding()) {
+      // If that's a system with the first-touch policy then it's enough
+      // to free the pages.
+      free_region(bottom_region);
+      free_region(top_region);
+    } else {
+      // In a system with static binding we have to change the bias whenever
+      // we reshape the heap.
+      bias_region(bottom_region, ls->lgrp_id());
+      bias_region(top_region, ls->lgrp_id());
+    }
 
     // If we clear the region, we would mangle it in debug. That would cause page
     // allocation in a different place. Hence setting the top directly.
     s->initialize(new_region, false);
     s->set_top(s->bottom());
 
-    ls->set_invalid_region(MemRegion());
-
     set_adaptation_cycles(samples_count());
   }
 }
@@ -575,7 +602,7 @@
     HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
 
     if (s->contains(value)) {
-      if (top < value && top < s->end()) {
+      if (!os::numa_has_static_binding() && top < value && top < s->end()) {
         ls->add_invalid_region(MemRegion(top, value));
       }
       s->set_top(value);
@@ -584,10 +611,10 @@
         if (found_top) {
             s->set_top(s->bottom());
         } else {
-            if (top < s->end()) {
-              ls->add_invalid_region(MemRegion(top, s->end()));
-            }
-            s->set_top(s->end());
+          if (!os::numa_has_static_binding() && top < s->end()) {
+            ls->add_invalid_region(MemRegion(top, s->end()));
+          }
+          s->set_top(s->end());
         }
     }
   }
@@ -601,11 +628,23 @@
   }
 }
 
+/*
+   Linux supports static memory binding, therefore the most part of the
+   logic dealing with the possible invalid page allocation is effectively
+   disabled. Besides there is no notion of the home node in Linux. A
+   thread is allowed to migrate freely. Although the scheduler is rather
+   reluctant to move threads between the nodes. We check for the current
+   node every allocation. And with a high probability a thread stays on
+   the same node for some time allowing local access to recently allocated
+   objects.
+ */
+
 HeapWord* MutableNUMASpace::allocate(size_t size) {
-  int lgrp_id = Thread::current()->lgrp_id();
-  if (lgrp_id == -1) {
+  Thread* thr = Thread::current();
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
     lgrp_id = os::numa_get_group_id();
-    Thread::current()->set_lgrp_id(lgrp_id);
+    thr->set_lgrp_id(lgrp_id);
   }
 
   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@@ -628,22 +667,22 @@
       MutableSpace::set_top(s->top());
     }
   }
-  // Make the page allocation happen here.
-  if (p != NULL) {
+  // Make the page allocation happen here if there is no static binding..
+  if (p != NULL && !os::numa_has_static_binding()) {
     for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
       *(int*)i = 0;
     }
   }
-
   return p;
 }
 
 // This version is lock-free.
 HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
-  int lgrp_id = Thread::current()->lgrp_id();
-  if (lgrp_id == -1) {
+  Thread* thr = Thread::current();
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
     lgrp_id = os::numa_get_group_id();
-    Thread::current()->set_lgrp_id(lgrp_id);
+    thr->set_lgrp_id(lgrp_id);
   }
 
   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@@ -670,8 +709,8 @@
     }
   }
 
-  // Make the page allocation happen here.
-  if (p != NULL) {
+  // Make the page allocation happen here if there is no static binding.
+  if (p != NULL && !os::numa_has_static_binding() ) {
     for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
       *(int*)i = 0;
     }
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Tue Apr 29 13:51:26 2008 +0400
@@ -139,8 +139,8 @@
   // Check if the NUMA topology has changed. Add and remove spaces if needed.
   // The update can be forced by setting the force parameter equal to true.
   bool update_layout(bool force);
-  // Bias region towards the first-touching lgrp.
-  void bias_region(MemRegion mr);
+  // Bias region towards the lgrp.
+  void bias_region(MemRegion mr, int lgrp_id);
   // Free pages in a given region.
   void free_region(MemRegion mr);
   // Get current chunk size.
--- a/hotspot/src/share/vm/includeDB_core	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/share/vm/includeDB_core	Tue Apr 29 13:51:26 2008 +0400
@@ -3181,6 +3181,7 @@
 os_<os_family>.cpp                      extendedPC.hpp
 os_<os_family>.cpp                      filemap.hpp
 os_<os_family>.cpp                      globals.hpp
+os_<os_family>.cpp                      growableArray.hpp
 os_<os_family>.cpp                      hpi.hpp
 os_<os_family>.cpp                      icBuffer.hpp
 os_<os_family>.cpp                      interfaceSupport.hpp
--- a/hotspot/src/share/vm/runtime/os.hpp	Wed Apr 16 12:58:03 2008 +0400
+++ b/hotspot/src/share/vm/runtime/os.hpp	Tue Apr 29 13:51:26 2008 +0400
@@ -33,6 +33,7 @@
 class Event;
 class DLL;
 class FileHandle;
+template<class E> class GrowableArray;
 
 // %%%%% Moved ThreadState, START_FN, OSThread to new osThread.hpp. -- Rose
 
@@ -206,7 +207,9 @@
   static void   realign_memory(char *addr, size_t bytes, size_t alignment_hint);
 
   // NUMA-specific interface
-  static void   numa_make_local(char *addr, size_t bytes);
+  static bool   numa_has_static_binding();
+  static bool   numa_has_group_homing();
+  static void   numa_make_local(char *addr, size_t bytes, int lgrp_hint);
   static void   numa_make_global(char *addr, size_t bytes);
   static size_t numa_get_groups_num();
   static size_t numa_get_leaf_groups(int *ids, size_t size);