--- a/hotspot/make/linux/makefiles/mapfile-vers-debug Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/make/linux/makefiles/mapfile-vers-debug Sun May 04 03:29:31 2008 -0700
@@ -279,7 +279,9 @@
jio_snprintf;
jio_vfprintf;
jio_vsnprintf;
- fork1;
+ fork1;
+ numa_warn;
+ numa_error;
# Needed because there is no JVM interface for this.
sysThreadAvailableStackWithSlack;
--- a/hotspot/make/linux/makefiles/mapfile-vers-product Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/make/linux/makefiles/mapfile-vers-product Sun May 04 03:29:31 2008 -0700
@@ -274,7 +274,9 @@
jio_snprintf;
jio_vfprintf;
jio_vsnprintf;
- fork1;
+ fork1;
+ numa_warn;
+ numa_error;
# Needed because there is no JVM interface for this.
sysThreadAvailableStackWithSlack;
--- a/hotspot/src/os/linux/vm/os_linux.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.cpp Sun May 04 03:29:31 2008 -0700
@@ -2228,20 +2228,42 @@
}
void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes) { }
+
+void os::free_memory(char *addr, size_t bytes) {
+ uncommit_memory(addr, bytes);
+}
+
void os::numa_make_global(char *addr, size_t bytes) { }
-void os::numa_make_local(char *addr, size_t bytes) { }
-bool os::numa_topology_changed() { return false; }
-size_t os::numa_get_groups_num() { return 1; }
-int os::numa_get_group_id() { return 0; }
-size_t os::numa_get_leaf_groups(int *ids, size_t size) {
- if (size > 0) {
- ids[0] = 0;
- return 1;
+
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
+ Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
+}
+
+bool os::numa_topology_changed() { return false; }
+
+size_t os::numa_get_groups_num() {
+ int max_node = Linux::numa_max_node();
+ return max_node > 0 ? max_node + 1 : 1;
+}
+
+int os::numa_get_group_id() {
+ int cpu_id = Linux::sched_getcpu();
+ if (cpu_id != -1) {
+ int lgrp_id = Linux::get_node_by_cpu(cpu_id);
+ if (lgrp_id != -1) {
+ return lgrp_id;
+ }
}
return 0;
}
+size_t os::numa_get_leaf_groups(int *ids, size_t size) {
+ for (size_t i = 0; i < size; i++) {
+ ids[i] = i;
+ }
+ return size;
+}
+
bool os::get_page_info(char *start, page_info* info) {
return false;
}
@@ -2250,6 +2272,74 @@
return end;
}
+extern "C" void numa_warn(int number, char *where, ...) { }
+extern "C" void numa_error(char *where) { }
+
+void os::Linux::libnuma_init() {
+ // sched_getcpu() should be in libc.
+ set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
+ dlsym(RTLD_DEFAULT, "sched_getcpu")));
+
+ if (sched_getcpu() != -1) { // Does it work?
+ void *handle = dlopen("libnuma.so", RTLD_LAZY);
+ if (handle != NULL) {
+ set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
+ dlsym(handle, "numa_node_to_cpus")));
+ set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
+ dlsym(handle, "numa_max_node")));
+ set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
+ dlsym(handle, "numa_available")));
+ set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
+ dlsym(handle, "numa_tonode_memory")));
+ if (numa_available() != -1) {
+ // Create a cpu -> node mapping
+ _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true);
+ rebuild_cpu_to_node_map();
+ }
+ }
+ }
+}
+
+// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
+// The table is later used in get_node_by_cpu().
+void os::Linux::rebuild_cpu_to_node_map() {
+ int cpu_num = os::active_processor_count();
+ cpu_to_node()->clear();
+ cpu_to_node()->at_grow(cpu_num - 1);
+ int node_num = numa_get_groups_num();
+ int cpu_map_size = (cpu_num + BitsPerLong - 1) / BitsPerLong;
+ unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size);
+ for (int i = 0; i < node_num; i++) {
+ if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
+ for (int j = 0; j < cpu_map_size; j++) {
+ if (cpu_map[j] != 0) {
+ for (int k = 0; k < BitsPerLong; k++) {
+ if (cpu_map[j] & (1UL << k)) {
+ cpu_to_node()->at_put(j * BitsPerLong + k, i);
+ }
+ }
+ }
+ }
+ }
+ }
+ FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
+}
+
+int os::Linux::get_node_by_cpu(int cpu_id) {
+ if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
+ return cpu_to_node()->at(cpu_id);
+ }
+ return -1;
+}
+
+GrowableArray<int>* os::Linux::_cpu_to_node;
+os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
+os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
+os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
+os::Linux::numa_available_func_t os::Linux::_numa_available;
+os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
+
+
bool os::uncommit_memory(char* addr, size_t size) {
return ::mmap(addr, size,
PROT_READ|PROT_WRITE|PROT_EXEC,
@@ -3552,6 +3642,10 @@
Linux::is_floating_stack() ? "floating stack" : "fixed stack");
}
+ if (UseNUMA) {
+ Linux::libnuma_init();
+ }
+
if (MaxFDLimit) {
// set the number of file descriptors to max. print out error
// if getrlimit/setrlimit fails but continue regardless.
--- a/hotspot/src/os/linux/vm/os_linux.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.hpp Sun May 04 03:29:31 2008 -0700
@@ -59,6 +59,8 @@
static bool _is_NPTL;
static bool _supports_fast_thread_cpu_time;
+ static GrowableArray<int>* _cpu_to_node;
+
protected:
static julong _physical_memory;
@@ -79,8 +81,9 @@
static void set_is_LinuxThreads() { _is_NPTL = false; }
static void set_is_floating_stack() { _is_floating_stack = true; }
+ static void rebuild_cpu_to_node_map();
+ static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; }
public:
-
static void init_thread_fpu_state();
static int get_fpu_control_word();
static void set_fpu_control_word(int fpu_control);
@@ -143,6 +146,7 @@
static bool is_floating_stack() { return _is_floating_stack; }
static void libpthread_init();
+ static void libnuma_init();
// Minimum stack size a thread can be created with (allowing
// the VM to completely create the thread and enter user code)
@@ -229,6 +233,38 @@
#undef SR_SUSPENDED
};
+
+private:
+ typedef int (*sched_getcpu_func_t)(void);
+ typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
+ typedef int (*numa_max_node_func_t)(void);
+ typedef int (*numa_available_func_t)(void);
+ typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
+
+
+ static sched_getcpu_func_t _sched_getcpu;
+ static numa_node_to_cpus_func_t _numa_node_to_cpus;
+ static numa_max_node_func_t _numa_max_node;
+ static numa_available_func_t _numa_available;
+ static numa_tonode_memory_func_t _numa_tonode_memory;
+
+ static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
+ static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
+ static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
+ static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
+ static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
+
+public:
+ static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
+ static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
+ return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
+ }
+ static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
+ static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
+ static int numa_tonode_memory(void *start, size_t size, int node) {
+ return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
+ }
+ static int get_node_by_cpu(int cpu_id);
};
--- a/hotspot/src/os/linux/vm/os_linux.inline.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.inline.hpp Sun May 04 03:29:31 2008 -0700
@@ -120,3 +120,6 @@
RESTARTABLE(_cmd, _result); \
return _result; \
} while(false)
+
+inline bool os::numa_has_static_binding() { return true; }
+inline bool os::numa_has_group_homing() { return false; }
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp Sun May 04 03:29:31 2008 -0700
@@ -2602,7 +2602,7 @@
}
// Tell the OS to make the range local to the first-touching LWP
-void os::numa_make_local(char *addr, size_t bytes) {
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
assert((intptr_t)addr % os::vm_page_size() == 0, "Address should be page-aligned.");
if (madvise(addr, bytes, MADV_ACCESS_LWP) < 0) {
debug_only(warning("MADV_ACCESS_LWP failed."));
--- a/hotspot/src/os/solaris/vm/os_solaris.inline.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/os/solaris/vm/os_solaris.inline.hpp Sun May 04 03:29:31 2008 -0700
@@ -204,3 +204,6 @@
RESTARTABLE(_cmd, _result); \
return _result; \
} while(false)
+
+inline bool os::numa_has_static_binding() { return false; }
+inline bool os::numa_has_group_homing() { return true; }
--- a/hotspot/src/os/windows/vm/os_windows.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/os/windows/vm/os_windows.cpp Sun May 04 03:29:31 2008 -0700
@@ -2581,7 +2581,7 @@
void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
void os::free_memory(char *addr, size_t bytes) { }
void os::numa_make_global(char *addr, size_t bytes) { }
-void os::numa_make_local(char *addr, size_t bytes) { }
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { }
bool os::numa_topology_changed() { return false; }
size_t os::numa_get_groups_num() { return 1; }
int os::numa_get_group_id() { return 0; }
--- a/hotspot/src/os/windows/vm/os_windows.inline.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/os/windows/vm/os_windows.inline.hpp Sun May 04 03:29:31 2008 -0700
@@ -69,3 +69,6 @@
*((int *)(sp - (pages * vm_page_size()))) = 0;
}
}
+
+inline bool os::numa_has_static_binding() { return true; }
+inline bool os::numa_has_group_homing() { return false; }
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp Sun May 04 03:29:31 2008 -0700
@@ -44,52 +44,12 @@
bool lock_owned = lock->owned_by_self();
if (lock_owned) {
MutexUnlocker mul(lock);
- return mem_allocate_work(size);
+ return mem_allocate_in_gen(size, _gen);
} else {
- return mem_allocate_work(size);
+ return mem_allocate_in_gen(size, _gen);
}
}
-HeapWord* CMSPermGen::mem_allocate_work(size_t size) {
- assert(!_gen->freelistLock()->owned_by_self(), "Potetntial deadlock");
-
- MutexLocker ml(Heap_lock);
- HeapWord* obj = NULL;
-
- obj = _gen->allocate(size, false);
- // Since we want to minimize pause times, we will prefer
- // expanding the perm gen rather than doing a stop-world
- // collection to satisfy the allocation request.
- if (obj == NULL) {
- // Try to expand the perm gen and allocate space.
- obj = _gen->expand_and_allocate(size, false, false);
- if (obj == NULL) {
- // Let's see if a normal stop-world full collection will
- // free up enough space.
- SharedHeap::heap()->collect_locked(GCCause::_permanent_generation_full);
- obj = _gen->allocate(size, false);
- if (obj == NULL) {
- // The collection above may have shrunk the space, so try
- // to expand again and allocate space.
- obj = _gen->expand_and_allocate(size, false, false);
- }
- if (obj == NULL) {
- // We have not been able to allocate space despite a
- // full stop-world collection. We now make a last-ditch collection
- // attempt (in which soft refs are all aggressively freed)
- // that will try to reclaim as much space as possible.
- SharedHeap::heap()->collect_locked(GCCause::_last_ditch_collection);
- obj = _gen->allocate(size, false);
- if (obj == NULL) {
- // Expand generation in case it was shrunk following the collection.
- obj = _gen->expand_and_allocate(size, false, false);
- }
- }
- }
- }
- return obj;
-}
-
void CMSPermGen::compute_new_size() {
_gen->compute_new_size();
}
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp Sun May 04 03:29:31 2008 -0700
@@ -29,7 +29,6 @@
class CMSPermGen: public PermGen {
friend class VMStructs;
- HeapWord* mem_allocate_work(size_t size);
protected:
// The "generation" view.
ConcurrentMarkSweepGeneration* _gen;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Sun May 04 03:29:31 2008 -0700
@@ -590,6 +590,31 @@
full_gc_count = Universe::heap()->total_full_collections();
result = perm_gen()->allocate_permanent(size);
+
+ if (result != NULL) {
+ return result;
+ }
+
+ if (GC_locker::is_active_and_needs_gc()) {
+ // If this thread is not in a jni critical section, we stall
+ // the requestor until the critical section has cleared and
+ // GC allowed. When the critical section clears, a GC is
+ // initiated by the last thread exiting the critical section; so
+ // we retry the allocation sequence from the beginning of the loop,
+ // rather than causing more, now probably unnecessary, GC attempts.
+ JavaThread* jthr = JavaThread::current();
+ if (!jthr->in_critical()) {
+ MutexUnlocker mul(Heap_lock);
+ GC_locker::stall_until_clear();
+ continue;
+ } else {
+ if (CheckJNICalls) {
+ fatal("Possible deadlock due to allocating while"
+ " in jni critical section");
+ }
+ return NULL;
+ }
+ }
}
if (result == NULL) {
@@ -622,6 +647,12 @@
if (op.prologue_succeeded()) {
assert(Universe::heap()->is_in_permanent_or_null(op.result()),
"result not in heap");
+ // If GC was locked out during VM operation then retry allocation
+ // and/or stall as necessary.
+ if (op.gc_locked()) {
+ assert(op.result() == NULL, "must be NULL if gc_locked() is true");
+ continue; // retry and/or stall as necessary
+ }
// If a NULL results is being returned, an out-of-memory
// will be thrown now. Clear the gc_time_limit_exceeded
// flag to avoid the following situation.
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Sun May 04 03:29:31 2008 -0700
@@ -169,8 +169,9 @@
size_t large_typearray_limit() { return FastAllocateSizeLimit; }
bool supports_inline_contig_alloc() const { return !UseNUMA; }
- HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : NULL; }
- HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : NULL; }
+
+ HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : (HeapWord**)-1; }
+ HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : (HeapWord**)-1; }
void ensure_parsability(bool retire_tlabs);
void accumulate_statistics_all_tlabs();
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Sun May 04 03:29:31 2008 -0700
@@ -976,7 +976,7 @@
DEBUG_ONLY(mark_bitmap_count = mark_bitmap_size = 0;)
// Increment the invocation count
- heap->increment_total_collections();
+ heap->increment_total_collections(true);
// We need to track unique mark sweep invocations as well.
_total_invocations++;
@@ -1941,7 +1941,7 @@
assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
assert(ref_processor() != NULL, "Sanity");
- if (GC_locker::is_active()) {
+ if (GC_locker::check_active_before_gc()) {
return;
}
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp Sun May 04 03:29:31 2008 -0700
@@ -69,6 +69,9 @@
GCCauseSetter gccs(heap, _gc_cause);
_result = heap->failed_permanent_mem_allocate(_size);
+ if (_result == NULL && GC_locker::is_active_and_needs_gc()) {
+ set_gc_locked();
+ }
notify_gc_end();
}
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Sun May 04 03:29:31 2008 -0700
@@ -46,9 +46,11 @@
for (int i = 0; i < lgrp_spaces()->length(); i++) {
LGRPSpace *ls = lgrp_spaces()->at(i);
MutableSpace *s = ls->space();
- HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
- if (top < s->end()) {
- ls->add_invalid_region(MemRegion(top, s->end()));
+ if (!os::numa_has_static_binding()) {
+ HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
+ if (top < s->end()) {
+ ls->add_invalid_region(MemRegion(top, s->end()));
+ }
}
s->mangle_unused_area();
}
@@ -70,32 +72,36 @@
area_touched_words);
}
#endif
- MemRegion invalid;
- HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
- HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
- os::vm_page_size());
- if (crossing_start != crossing_end) {
- // If object header crossed a small page boundary we mark the area
- // as invalid rounding it to a page_size().
- HeapWord *start = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
- HeapWord *end = MIN2((HeapWord*)round_to((intptr_t)(s->top() + area_touched_words), page_size()),
- s->end());
- invalid = MemRegion(start, end);
+ if (!os::numa_has_static_binding()) {
+ MemRegion invalid;
+ HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
+ HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
+ os::vm_page_size());
+ if (crossing_start != crossing_end) {
+ // If object header crossed a small page boundary we mark the area
+ // as invalid rounding it to a page_size().
+ HeapWord *start = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
+ HeapWord *end = MIN2((HeapWord*)round_to((intptr_t)(s->top() + area_touched_words), page_size()),
+ s->end());
+ invalid = MemRegion(start, end);
+ }
+
+ ls->add_invalid_region(invalid);
}
-
- ls->add_invalid_region(invalid);
s->set_top(s->end());
}
} else {
+ if (!os::numa_has_static_binding()) {
#ifdef ASSERT
- MemRegion invalid(s->top(), s->end());
- ls->add_invalid_region(invalid);
-#else
- if (ZapUnusedHeapArea) {
MemRegion invalid(s->top(), s->end());
ls->add_invalid_region(invalid);
- } else break;
+#else
+ if (ZapUnusedHeapArea) {
+ MemRegion invalid(s->top(), s->end());
+ ls->add_invalid_region(invalid);
+ } else break;
#endif
+ }
}
}
}
@@ -194,7 +200,7 @@
}
// Bias region towards the first-touching lgrp. Set the right page sizes.
-void MutableNUMASpace::bias_region(MemRegion mr) {
+void MutableNUMASpace::bias_region(MemRegion mr, int lgrp_id) {
HeapWord *start = (HeapWord*)round_to((intptr_t)mr.start(), page_size());
HeapWord *end = (HeapWord*)round_down((intptr_t)mr.end(), page_size());
if (end > start) {
@@ -202,9 +208,13 @@
assert((intptr_t)aligned_region.start() % page_size() == 0 &&
(intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
assert(region().contains(aligned_region), "Sanity");
- os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+ // First we tell the OS which page size we want in the given range. The underlying
+ // large page can be broken down if we require small pages.
os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
- os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size());
+ // Then we uncommit the pages in the range.
+ os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+ // And make them local/first-touch biased.
+ os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
}
}
@@ -233,10 +243,12 @@
initialize(region(), true);
} else {
bool should_initialize = false;
- for (int i = 0; i < lgrp_spaces()->length(); i++) {
- if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
- should_initialize = true;
- break;
+ if (!os::numa_has_static_binding()) {
+ for (int i = 0; i < lgrp_spaces()->length(); i++) {
+ if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
+ should_initialize = true;
+ break;
+ }
}
}
@@ -472,8 +484,8 @@
intersection = MemRegion(new_region.start(), new_region.start());
}
select_tails(new_region, intersection, &bottom_region, &top_region);
- bias_region(bottom_region);
- bias_region(top_region);
+ bias_region(bottom_region, lgrp_spaces()->at(0)->lgrp_id());
+ bias_region(top_region, lgrp_spaces()->at(lgrp_spaces()->length() - 1)->lgrp_id());
}
// Check if the space layout has changed significantly?
@@ -545,22 +557,37 @@
intersection = MemRegion(new_region.start(), new_region.start());
}
- MemRegion invalid_region = ls->invalid_region().intersection(new_region);
- if (!invalid_region.is_empty()) {
- merge_regions(new_region, &intersection, &invalid_region);
- free_region(invalid_region);
+ if (!os::numa_has_static_binding()) {
+ MemRegion invalid_region = ls->invalid_region().intersection(new_region);
+ // Invalid region is a range of memory that could've possibly
+ // been allocated on the other node. That's relevant only on Solaris where
+ // there is no static memory binding.
+ if (!invalid_region.is_empty()) {
+ merge_regions(new_region, &intersection, &invalid_region);
+ free_region(invalid_region);
+ ls->set_invalid_region(MemRegion());
+ }
}
+
select_tails(new_region, intersection, &bottom_region, &top_region);
- free_region(bottom_region);
- free_region(top_region);
+
+ if (!os::numa_has_static_binding()) {
+ // If that's a system with the first-touch policy then it's enough
+ // to free the pages.
+ free_region(bottom_region);
+ free_region(top_region);
+ } else {
+ // In a system with static binding we have to change the bias whenever
+ // we reshape the heap.
+ bias_region(bottom_region, ls->lgrp_id());
+ bias_region(top_region, ls->lgrp_id());
+ }
// If we clear the region, we would mangle it in debug. That would cause page
// allocation in a different place. Hence setting the top directly.
s->initialize(new_region, false);
s->set_top(s->bottom());
- ls->set_invalid_region(MemRegion());
-
set_adaptation_cycles(samples_count());
}
}
@@ -575,7 +602,7 @@
HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
if (s->contains(value)) {
- if (top < value && top < s->end()) {
+ if (!os::numa_has_static_binding() && top < value && top < s->end()) {
ls->add_invalid_region(MemRegion(top, value));
}
s->set_top(value);
@@ -584,10 +611,10 @@
if (found_top) {
s->set_top(s->bottom());
} else {
- if (top < s->end()) {
- ls->add_invalid_region(MemRegion(top, s->end()));
- }
- s->set_top(s->end());
+ if (!os::numa_has_static_binding() && top < s->end()) {
+ ls->add_invalid_region(MemRegion(top, s->end()));
+ }
+ s->set_top(s->end());
}
}
}
@@ -601,11 +628,23 @@
}
}
+/*
+ Linux supports static memory binding, therefore the most part of the
+ logic dealing with the possible invalid page allocation is effectively
+ disabled. Besides there is no notion of the home node in Linux. A
+ thread is allowed to migrate freely. Although the scheduler is rather
+ reluctant to move threads between the nodes. We check for the current
+ node every allocation. And with a high probability a thread stays on
+ the same node for some time allowing local access to recently allocated
+ objects.
+ */
+
HeapWord* MutableNUMASpace::allocate(size_t size) {
- int lgrp_id = Thread::current()->lgrp_id();
- if (lgrp_id == -1) {
+ Thread* thr = Thread::current();
+ int lgrp_id = thr->lgrp_id();
+ if (lgrp_id == -1 || !os::numa_has_group_homing()) {
lgrp_id = os::numa_get_group_id();
- Thread::current()->set_lgrp_id(lgrp_id);
+ thr->set_lgrp_id(lgrp_id);
}
int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@@ -628,22 +667,22 @@
MutableSpace::set_top(s->top());
}
}
- // Make the page allocation happen here.
- if (p != NULL) {
+ // Make the page allocation happen here if there is no static binding..
+ if (p != NULL && !os::numa_has_static_binding()) {
for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
*(int*)i = 0;
}
}
-
return p;
}
// This version is lock-free.
HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
- int lgrp_id = Thread::current()->lgrp_id();
- if (lgrp_id == -1) {
+ Thread* thr = Thread::current();
+ int lgrp_id = thr->lgrp_id();
+ if (lgrp_id == -1 || !os::numa_has_group_homing()) {
lgrp_id = os::numa_get_group_id();
- Thread::current()->set_lgrp_id(lgrp_id);
+ thr->set_lgrp_id(lgrp_id);
}
int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@@ -670,8 +709,8 @@
}
}
- // Make the page allocation happen here.
- if (p != NULL) {
+ // Make the page allocation happen here if there is no static binding.
+ if (p != NULL && !os::numa_has_static_binding() ) {
for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
*(int*)i = 0;
}
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Sun May 04 03:29:31 2008 -0700
@@ -139,8 +139,8 @@
// Check if the NUMA topology has changed. Add and remove spaces if needed.
// The update can be forced by setting the force parameter equal to true.
bool update_layout(bool force);
- // Bias region towards the first-touching lgrp.
- void bias_region(MemRegion mr);
+ // Bias region towards the lgrp.
+ void bias_region(MemRegion mr, int lgrp_id);
// Free pages in a given region.
void free_region(MemRegion mr);
// Get current chunk size.
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp Sun May 04 03:29:31 2008 -0700
@@ -144,3 +144,18 @@
gch->do_full_collection(gch->must_clear_all_soft_refs(), _max_level);
notify_gc_end();
}
+
+void VM_GenCollectForPermanentAllocation::doit() {
+ JvmtiGCForAllocationMarker jgcm;
+ notify_gc_begin(true);
+ GenCollectedHeap* gch = GenCollectedHeap::heap();
+ GCCauseSetter gccs(gch, _gc_cause);
+ gch->do_full_collection(gch->must_clear_all_soft_refs(),
+ gch->n_gens() - 1);
+ _res = gch->perm_gen()->allocate(_size, false);
+ assert(gch->is_in_reserved_or_null(_res), "result not in heap");
+ if (_res == NULL && GC_locker::is_active_and_needs_gc()) {
+ set_gc_locked();
+ }
+ notify_gc_end();
+}
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.hpp Sun May 04 03:29:31 2008 -0700
@@ -43,6 +43,7 @@
// is specified; and also the attach "inspectheap" operation
//
// VM_GenCollectForAllocation
+// VM_GenCollectForPermanentAllocation
// VM_ParallelGCFailedAllocation
// VM_ParallelGCFailedPermanentAllocation
// - this operation is invoked when allocation is failed;
@@ -166,3 +167,23 @@
virtual VMOp_Type type() const { return VMOp_GenCollectFull; }
virtual void doit();
};
+
+class VM_GenCollectForPermanentAllocation: public VM_GC_Operation {
+ private:
+ HeapWord* _res;
+ size_t _size; // size of object to be allocated
+ public:
+ VM_GenCollectForPermanentAllocation(size_t size,
+ unsigned int gc_count_before,
+ unsigned int full_gc_count_before,
+ GCCause::Cause gc_cause)
+ : VM_GC_Operation(gc_count_before, full_gc_count_before, true),
+ _size(size) {
+ _res = NULL;
+ _gc_cause = gc_cause;
+ }
+ ~VM_GenCollectForPermanentAllocation() {}
+ virtual VMOp_Type type() const { return VMOp_GenCollectForPermanentAllocation; }
+ virtual void doit();
+ HeapWord* result() const { return _res; }
+};
--- a/hotspot/src/share/vm/includeDB_core Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/includeDB_core Sun May 04 03:29:31 2008 -0700
@@ -718,6 +718,11 @@
ciObjArray.cpp ciUtilities.hpp
ciObjArray.cpp objArrayOop.hpp
+ciObjArray.cpp ciObjArray.hpp
+ciObjArray.cpp ciNullObject.hpp
+ciObjArray.cpp ciUtilities.hpp
+ciObjArray.cpp objArrayOop.hpp
+
ciObjArrayKlass.cpp ciInstanceKlass.hpp
ciObjArrayKlass.cpp ciObjArrayKlass.hpp
ciObjArrayKlass.cpp ciObjArrayKlassKlass.hpp
@@ -1662,6 +1667,7 @@
gcLocker.cpp gcLocker.inline.hpp
gcLocker.cpp sharedHeap.hpp
+gcLocker.cpp resourceArea.hpp
gcLocker.hpp collectedHeap.hpp
gcLocker.hpp genCollectedHeap.hpp
@@ -3094,13 +3100,14 @@
oopMap.cpp signature.hpp
oopMap.hpp allocation.hpp
+oopMapCache.cpp jvmtiRedefineClassesTrace.hpp
oopMap.hpp compressedStream.hpp
oopMap.hpp growableArray.hpp
oopMap.hpp vmreg.hpp
oopMapCache.cpp allocation.inline.hpp
+oopMapCache.cpp jvmtiRedefineClassesTrace.hpp
oopMapCache.cpp handles.inline.hpp
-oopMapCache.cpp jvmtiRedefineClassesTrace.hpp
oopMapCache.cpp oop.inline.hpp
oopMapCache.cpp oopMapCache.hpp
oopMapCache.cpp resourceArea.hpp
@@ -3207,6 +3214,7 @@
os_<os_family>.cpp extendedPC.hpp
os_<os_family>.cpp filemap.hpp
os_<os_family>.cpp globals.hpp
+os_<os_family>.cpp growableArray.hpp
os_<os_family>.cpp hpi.hpp
os_<os_family>.cpp icBuffer.hpp
os_<os_family>.cpp interfaceSupport.hpp
@@ -3348,6 +3356,10 @@
permGen.cpp oop.inline.hpp
permGen.cpp permGen.hpp
permGen.cpp universe.hpp
+permGen.cpp gcLocker.hpp
+permGen.cpp gcLocker.inline.hpp
+permGen.cpp vmGCOperations.hpp
+permGen.cpp vmThread.hpp
permGen.hpp gcCause.hpp
permGen.hpp generation.hpp
--- a/hotspot/src/share/vm/memory/gcLocker.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/memory/gcLocker.cpp Sun May 04 03:29:31 2008 -0700
@@ -32,6 +32,12 @@
void GC_locker::stall_until_clear() {
assert(!JavaThread::current()->in_critical(), "Would deadlock");
+ if (PrintJNIGCStalls && PrintGCDetails) {
+ ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
+ gclog_or_tty->print_cr(
+ "Allocation failed. Thread \"%s\" is stalled by JNI critical section.",
+ JavaThread::current()->name());
+ }
MutexLocker ml(JNICritical_lock);
// Wait for _needs_gc to be cleared
while (GC_locker::needs_gc()) {
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp Sun May 04 03:29:31 2008 -0700
@@ -35,6 +35,7 @@
friend class CMSCollector;
friend class GenMarkSweep;
friend class VM_GenCollectForAllocation;
+ friend class VM_GenCollectForPermanentAllocation;
friend class VM_GenCollectFull;
friend class VM_GenCollectFullConcurrent;
friend class VM_GC_HeapInspection;
--- a/hotspot/src/share/vm/memory/permGen.cpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/memory/permGen.cpp Sun May 04 03:29:31 2008 -0700
@@ -25,6 +25,70 @@
#include "incls/_precompiled.incl"
#include "incls/_permGen.cpp.incl"
+HeapWord* PermGen::mem_allocate_in_gen(size_t size, Generation* gen) {
+ MutexLocker ml(Heap_lock);
+ GCCause::Cause next_cause = GCCause::_permanent_generation_full;
+ GCCause::Cause prev_cause = GCCause::_no_gc;
+
+ for (;;) {
+ HeapWord* obj = gen->allocate(size, false);
+ if (obj != NULL) {
+ return obj;
+ }
+ if (gen->capacity() < _capacity_expansion_limit ||
+ prev_cause != GCCause::_no_gc) {
+ obj = gen->expand_and_allocate(size, false);
+ }
+ if (obj == NULL && prev_cause != GCCause::_last_ditch_collection) {
+ if (GC_locker::is_active_and_needs_gc()) {
+ // If this thread is not in a jni critical section, we stall
+ // the requestor until the critical section has cleared and
+ // GC allowed. When the critical section clears, a GC is
+ // initiated by the last thread exiting the critical section; so
+ // we retry the allocation sequence from the beginning of the loop,
+ // rather than causing more, now probably unnecessary, GC attempts.
+ JavaThread* jthr = JavaThread::current();
+ if (!jthr->in_critical()) {
+ MutexUnlocker mul(Heap_lock);
+ // Wait for JNI critical section to be exited
+ GC_locker::stall_until_clear();
+ continue;
+ } else {
+ if (CheckJNICalls) {
+ fatal("Possible deadlock due to allocating while"
+ " in jni critical section");
+ }
+ return NULL;
+ }
+ }
+
+ // Read the GC count while holding the Heap_lock
+ unsigned int gc_count_before = SharedHeap::heap()->total_collections();
+ unsigned int full_gc_count_before = SharedHeap::heap()->total_full_collections();
+ {
+ MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back
+ VM_GenCollectForPermanentAllocation op(size, gc_count_before, full_gc_count_before,
+ next_cause);
+ VMThread::execute(&op);
+ if (!op.prologue_succeeded() || op.gc_locked()) {
+ assert(op.result() == NULL, "must be NULL if gc_locked() is true");
+ continue; // retry and/or stall as necessary
+ }
+ obj = op.result();
+ assert(obj == NULL || SharedHeap::heap()->is_in_reserved(obj),
+ "result not in heap");
+ if (obj != NULL) {
+ return obj;
+ }
+ }
+ prev_cause = next_cause;
+ next_cause = GCCause::_last_ditch_collection;
+ } else {
+ return obj;
+ }
+ }
+}
+
CompactingPermGen::CompactingPermGen(ReservedSpace rs,
ReservedSpace shared_rs,
size_t initial_byte_size,
@@ -44,40 +108,7 @@
}
HeapWord* CompactingPermGen::mem_allocate(size_t size) {
- MutexLocker ml(Heap_lock);
- HeapWord* obj = _gen->allocate(size, false);
- bool tried_collection = false;
- bool tried_expansion = false;
- while (obj == NULL) {
- if (_gen->capacity() >= _capacity_expansion_limit || tried_expansion) {
- // Expansion limit reached, try collection before expanding further
- // For now we force a full collection, this could be changed
- SharedHeap::heap()->collect_locked(GCCause::_permanent_generation_full);
- obj = _gen->allocate(size, false);
- tried_collection = true;
- tried_expansion = false; // ... following the collection:
- // the collection may have shrunk the space.
- }
- if (obj == NULL && !tried_expansion) {
- obj = _gen->expand_and_allocate(size, false);
- tried_expansion = true;
- }
- if (obj == NULL && tried_collection && tried_expansion) {
- // We have not been able to allocate despite a collection and
- // an attempted space expansion. We now make a last-ditch collection
- // attempt that will try to reclaim as much space as possible (for
- // example by aggressively clearing all soft refs).
- SharedHeap::heap()->collect_locked(GCCause::_last_ditch_collection);
- obj = _gen->allocate(size, false);
- if (obj == NULL) {
- // An expansion attempt is necessary since the previous
- // collection may have shrunk the space.
- obj = _gen->expand_and_allocate(size, false);
- }
- break;
- }
- }
- return obj;
+ return mem_allocate_in_gen(size, _gen);
}
void CompactingPermGen::compute_new_size() {
--- a/hotspot/src/share/vm/memory/permGen.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/memory/permGen.hpp Sun May 04 03:29:31 2008 -0700
@@ -38,6 +38,8 @@
size_t _capacity_expansion_limit; // maximum expansion allowed without a
// full gc occuring
+ HeapWord* mem_allocate_in_gen(size_t size, Generation* gen);
+
public:
enum Name {
MarkSweepCompact, MarkSweep, ConcurrentMarkSweep
--- a/hotspot/src/share/vm/runtime/globals.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp Sun May 04 03:29:31 2008 -0700
@@ -1928,6 +1928,10 @@
develop(bool, IgnoreLibthreadGPFault, false, \
"Suppress workaround for libthread GP fault") \
\
+ product(bool, PrintJNIGCStalls, false, \
+ "Print diagnostic message when GC is stalled" \
+ "by JNI critical section") \
+ \
/* JVMTI heap profiling */ \
\
diagnostic(bool, TraceJVMTIObjectTagging, false, \
--- a/hotspot/src/share/vm/runtime/os.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/runtime/os.hpp Sun May 04 03:29:31 2008 -0700
@@ -33,6 +33,7 @@
class Event;
class DLL;
class FileHandle;
+template<class E> class GrowableArray;
// %%%%% Moved ThreadState, START_FN, OSThread to new osThread.hpp. -- Rose
@@ -206,7 +207,9 @@
static void realign_memory(char *addr, size_t bytes, size_t alignment_hint);
// NUMA-specific interface
- static void numa_make_local(char *addr, size_t bytes);
+ static bool numa_has_static_binding();
+ static bool numa_has_group_homing();
+ static void numa_make_local(char *addr, size_t bytes, int lgrp_hint);
static void numa_make_global(char *addr, size_t bytes);
static size_t numa_get_groups_num();
static size_t numa_get_leaf_groups(int *ids, size_t size);
--- a/hotspot/src/share/vm/runtime/vm_operations.hpp Fri May 02 08:22:11 2008 -0700
+++ b/hotspot/src/share/vm/runtime/vm_operations.hpp Sun May 04 03:29:31 2008 -0700
@@ -49,6 +49,7 @@
template(GenCollectFull) \
template(GenCollectFullConcurrent) \
template(GenCollectForAllocation) \
+ template(GenCollectForPermanentAllocation) \
template(ParallelGCFailedAllocation) \
template(ParallelGCFailedPermanentAllocation) \
template(ParallelGCSystemGC) \