jdk-sandbox: comparison src/hotspot/os/linux/os

equal deleted inserted replaced

-:d5d542d50e3c
+:30a5049a36bb
 /*
-* Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 #include "code/vtableStubs.hpp"
 #include "compiler/compileBroker.hpp"
 #include "compiler/disassembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "logging/log.hpp"
+#include "logging/logStream.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/filemap.hpp"
 #include "oops/oop.inline.hpp"
 #include "os_linux.inline.hpp"
+#include "os_posix.inline.hpp"
 #include "os_share_linux.hpp"
 #include "osContainer_linux.hpp"
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm_misc.hpp"
 #include "runtime/arguments.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
 #include "runtime/threadSMR.hpp"
 #include "runtime/timer.hpp"
+#include "runtime/vm_version.hpp"
 #include "semaphore_posix.hpp"
 #include "services/attachListener.hpp"
 #include "services/memTracker.hpp"
 #include "services/runtimeService.hpp"
 #include "utilities/align.hpp"
 # include <sys/mman.h>
 # include <sys/stat.h>
 # include <sys/select.h>
 # include <pthread.h>
 # include <signal.h>
+# include <endian.h>
 # include <errno.h>
 # include <dlfcn.h>
 # include <stdio.h>
 # include <unistd.h>
 # include <sys/resource.h>
 #define MAX_SECS 100000000
 // for timer info max values which include all bits
 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
-#define LARGEPAGES_BIT (1 << 6)
+enum CoredumpFilterBit {
-#define DAX_SHARED_BIT (1 << 8)
+FILE_BACKED_PVT_BIT = 1 << 2,
+FILE_BACKED_SHARED_BIT = 1 << 3,
+LARGEPAGES_BIT = 1 << 6,
+DAX_SHARED_BIT = 1 << 8
+};
 ////////////////////////////////////////////////////////////////////////////////
 // global variables
 julong os::Linux::_physical_memory = 0;
 address   os::Linux::_initial_thread_stack_bottom = NULL;
 uintptr_t os::Linux::_initial_thread_stack_size   = 0;
-int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
 int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
 int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL;
-Mutex* os::Linux::_createThread_lock = NULL;
 pthread_t os::Linux::_main_thread;
 int os::Linux::_page_size = -1;
 bool os::Linux::_supports_fast_thread_cpu_time = false;
-uint32_t os::Linux::_os_version = 0;
 const char * os::Linux::_glibc_version = NULL;
 const char * os::Linux::_libpthread_version = NULL;
 static jlong initial_time_count=0;
 }
 phys_mem = Linux::physical_memory();
 log_trace(os)("total system memory: " JLONG_FORMAT, phys_mem);
 return phys_mem;
+}
+static uint64_t initial_total_ticks = 0;
+static uint64_t initial_steal_ticks = 0;
+static bool     has_initial_tick_info = false;
+static void next_line(FILE *f) {
+int c;
+do {
+c = fgetc(f);
+} while (c != '\n' && c != EOF);
+}
+bool os::Linux::get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu) {
+FILE*         fh;
+uint64_t      userTicks, niceTicks, systemTicks, idleTicks;
+// since at least kernel 2.6 : iowait: time waiting for I/O to complete
+// irq: time  servicing interrupts; softirq: time servicing softirqs
+uint64_t      iowTicks = 0, irqTicks = 0, sirqTicks= 0;
+// steal (since kernel 2.6.11): time spent in other OS when running in a virtualized environment
+uint64_t      stealTicks = 0;
+// guest (since kernel 2.6.24): time spent running a virtual CPU for guest OS under the
+// control of the Linux kernel
+uint64_t      guestNiceTicks = 0;
+int           logical_cpu = -1;
+const int     required_tickinfo_count = (which_logical_cpu == -1) ? 4 : 5;
+int           n;
+memset(pticks, 0, sizeof(CPUPerfTicks));
+if ((fh = fopen("/proc/stat", "r")) == NULL) {
+return false;
+}
+if (which_logical_cpu == -1) {
+n = fscanf(fh, "cpu " UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " "
+UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " "
+UINT64_FORMAT " " UINT64_FORMAT " ",
+&userTicks, &niceTicks, &systemTicks, &idleTicks,
+&iowTicks, &irqTicks, &sirqTicks,
+&stealTicks, &guestNiceTicks);
+} else {
+// Move to next line
+next_line(fh);
+// find the line for requested cpu faster to just iterate linefeeds?
+for (int i = 0; i < which_logical_cpu; i++) {
+next_line(fh);
+}
+n = fscanf(fh, "cpu%u " UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " "
+UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " "
+UINT64_FORMAT " " UINT64_FORMAT " ",
+&logical_cpu, &userTicks, &niceTicks,
+&systemTicks, &idleTicks, &iowTicks, &irqTicks, &sirqTicks,
+&stealTicks, &guestNiceTicks);
+}
+fclose(fh);
+if (n < required_tickinfo_count || logical_cpu != which_logical_cpu) {
+return false;
+}
+pticks->used       = userTicks + niceTicks;
+pticks->usedKernel = systemTicks + irqTicks + sirqTicks;
+pticks->total      = userTicks + niceTicks + systemTicks + idleTicks +
+iowTicks + irqTicks + sirqTicks + stealTicks + guestNiceTicks;
+if (n > required_tickinfo_count + 3) {
+pticks->steal = stealTicks;
+pticks->has_steal_ticks = true;
+} else {
+pticks->steal = 0;
+pticks->has_steal_ticks = false;
+}
+return true;
 }
 // Return true if user is running as root.
 bool os::have_special_privileges() {
 return (pid_t)rslt;
 }
 // Most versions of linux have a bug where the number of processors are
 // determined by looking at the /proc file system.  In a chroot environment,
-// the system call returns 1.  This causes the VM to act as if it is
+// the system call returns 1.
-// a single processor and elide locking (see is_MP() call).
 static bool unsafe_chroot_detected = false;
 static const char *unstable_chroot_error = "/proc file system not found.\n"
 "Java may be unstable running multithreaded in a chroot "
 "environment on Linux when /proc filesystem is not mounted.";
 //      The linker uses the following search paths to locate required
 //      shared libraries:
 //        1: ...
 //        ...
 //        7: The default directories, normally /lib and /usr/lib.
-#if defined(AMD64) || (defined(_LP64) && defined(SPARC)) || defined(PPC64) || defined(S390)
+#ifndef OVERRIDE_LIBPATH
-#define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
+#if defined(AMD64) || (defined(_LP64) && defined(SPARC)) || defined(PPC64) || defined(S390)
+#define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
+#else
+#define DEFAULT_LIBPATH "/lib:/usr/lib"
+#endif
 #else
-#define DEFAULT_LIBPATH "/lib:/usr/lib"
+#define DEFAULT_LIBPATH OVERRIDE_LIBPATH
 #endif
 // Base path of extensions installed on the system.
 #define SYS_EXT_DIR     "/usr/java/packages"
 #define EXTENSIONS_DIR  "/lib/ext"
 // Note that the space for the colon and the trailing null are provided
 // by the nulls included by the sizeof operator.
 const size_t bufsize =
 MAX2((size_t)MAXPATHLEN,  // For dll_dir & friends.
 (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + sizeof(SYS_EXT_DIR) + sizeof(EXTENSIONS_DIR)); // extensions dir
-char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
+char *buf = NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
 // sysclasspath, java_home, dll_dir
 {
 char *pslash;
 os::jvm_path(buf, bufsize);
 // addressed).
 const char *v = ::getenv("LD_LIBRARY_PATH");
 const char *v_colon = ":";
 if (v == NULL) { v = ""; v_colon = ""; }
 // That's +1 for the colon and +1 for the trailing '\0'.
-char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char,
+char *ld_library_path = NEW_C_HEAP_ARRAY(char,
 strlen(v) + 1 +
 sizeof(SYS_EXT_DIR) + sizeof("/lib/") + sizeof(DEFAULT_LIBPATH) + 1,
 mtInternal);
 sprintf(ld_library_path, "%s%s" SYS_EXT_DIR "/lib:" DEFAULT_LIBPATH, v, v_colon);
 Arguments::set_library_path(ld_library_path);
 FREE_C_HEAP_ARRAY(char, ld_library_path);
 }
 //////////////////////////////////////////////////////////////////////////////
 // create new thread
 // Thread start routine for all newly created threads
 static void *thread_native_entry(Thread *thread) {
+thread->record_stack_base_and_size();
 // Try to randomize the cache line index of hot stack frames.
 // This helps when threads of the same stack traces evict each other's
 // cache lines. The threads can be either from the same JVM instance, or
 // from different JVM instances. The benefit is especially true for
 // processors with hyperthreading technology.
 // initialize floating point control register
 os::Linux::init_thread_fpu_state();
 // handshaking with parent thread
 {
-MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
+MutexLocker ml(sync, Mutex::_no_safepoint_check_flag);
 // notify parent thread
 osthread->set_state(INITIALIZED);
 sync->notify_all();
 // wait until os::start_thread()
 while (osthread->get_state() == INITIALIZED) {
-sync->wait(Mutex::_no_safepoint_check_flag);
+sync->wait_without_safepoint_check();
 }
 }
+assert(osthread->pthread_id() != 0, "pthread_id was not set as expected");
 // call one more level start routine
-thread->run();
+thread->call_run();
+// Note: at this point the thread object may already have deleted itself.
+// Prevent dereferencing it from here on out.
+thread = NULL;
 log_info(os, thread)("Thread finished (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").",
 os::current_thread_id(), (uintx) pthread_self());
-// If a thread has not deleted itself ("delete this") as part of its
-// termination sequence, we have to ensure thread-local-storage is
-// cleared before we actually terminate. No threads should ever be
-// deleted asynchronously with respect to their termination.
-if (Thread::current_or_null_safe() != NULL) {
-assert(Thread::current_or_null_safe() == thread, "current thread is wrong");
-thread->clear_thread_current();
-}
 return 0;
+}
+// On Linux, glibc places static TLS blocks (for __thread variables) on
+// the thread stack. This decreases the stack size actually available
+// to threads.
+//
+// For large static TLS sizes, this may cause threads to malfunction due
+// to insufficient stack space. This is a well-known issue in glibc:
+// http://sourceware.org/bugzilla/show_bug.cgi?id=11787.
+//
+// As a workaround, we call a private but assumed-stable glibc function,
+// __pthread_get_minstack() to obtain the minstack size and derive the
+// static TLS size from it. We then increase the user requested stack
+// size by this TLS size.
+//
+// Due to compatibility concerns, this size adjustment is opt-in and
+// controlled via AdjustStackSizeForTLS.
+typedef size_t (*GetMinStack)(const pthread_attr_t *attr);
+GetMinStack _get_minstack_func = NULL;
+static void get_minstack_init() {
+_get_minstack_func =
+(GetMinStack)dlsym(RTLD_DEFAULT, "__pthread_get_minstack");
+log_info(os, thread)("Lookup of __pthread_get_minstack %s",
+_get_minstack_func == NULL ? "failed" : "succeeded");
+}
+// Returns the size of the static TLS area glibc puts on thread stacks.
+// The value is cached on first use, which occurs when the first thread
+// is created during VM initialization.
+static size_t get_static_tls_area_size(const pthread_attr_t *attr) {
+size_t tls_size = 0;
+if (_get_minstack_func != NULL) {
+// Obtain the pthread minstack size by calling __pthread_get_minstack.
+size_t minstack_size = _get_minstack_func(attr);
+// Remove non-TLS area size included in minstack size returned
+// by __pthread_get_minstack() to get the static TLS size.
+// In glibc before 2.27, minstack size includes guard_size.
+// In glibc 2.27 and later, guard_size is automatically added
+// to the stack size by pthread_create and is no longer included
+// in minstack size. In both cases, the guard_size is taken into
+// account, so there is no need to adjust the result for that.
+//
+// Although __pthread_get_minstack() is a private glibc function,
+// it is expected to have a stable behavior across future glibc
+// versions while glibc still allocates the static TLS blocks off
+// the stack. Following is glibc 2.28 __pthread_get_minstack():
+//
+// size_t
+// __pthread_get_minstack (const pthread_attr_t *attr)
+// {
+//   return GLRO(dl_pagesize) + __static_tls_size + PTHREAD_STACK_MIN;
+// }
+//
+//
+// The following 'minstack_size > os::vm_page_size() + PTHREAD_STACK_MIN'
+// if check is done for precaution.
+if (minstack_size > (size_t)os::vm_page_size() + PTHREAD_STACK_MIN) {
+tls_size = minstack_size - os::vm_page_size() - PTHREAD_STACK_MIN;
+}
+}
+log_info(os, thread)("Stack size adjustment for TLS is " SIZE_FORMAT,
+tls_size);
+return tls_size;
 }
 bool os::create_thread(Thread* thread, ThreadType thr_type,
 size_t req_stack_size) {
 assert(thread->osthread() == NULL, "caller responsible");
 pthread_attr_init(&attr);
 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 // Calculate stack size if it's not specified by caller.
 size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);
-// In the Linux NPTL pthread implementation the guard size mechanism
+// In glibc versions prior to 2.7 the guard size mechanism
 // is not implemented properly. The posix standard requires adding
 // the size of the guard pages to the stack size, instead Linux
 // takes the space out of 'stacksize'. Thus we adapt the requested
 // stack_size by the size of the guard pages to mimick proper
 // behaviour. However, be careful not to end up with a size
 // of zero due to overflow. Don't add the guard page in that case.
 size_t guard_size = os::Linux::default_guard_size(thr_type);
-if (stack_size <= SIZE_MAX - guard_size) {
+// Configure glibc guard page. Must happen before calling
-stack_size += guard_size;
+// get_static_tls_area_size(), which uses the guard_size.
+pthread_attr_setguardsize(&attr, guard_size);
+size_t stack_adjust_size = 0;
+if (AdjustStackSizeForTLS) {
+// Adjust the stack_size for on-stack TLS - see get_static_tls_area_size().
+stack_adjust_size += get_static_tls_area_size(&attr);
+} else {
+stack_adjust_size += guard_size;
+}
+stack_adjust_size = align_up(stack_adjust_size, os::vm_page_size());
+if (stack_size <= SIZE_MAX - stack_adjust_size) {
+stack_size += stack_adjust_size;
 }
 assert(is_aligned(stack_size, os::vm_page_size()), "stack_size not aligned");
 int status = pthread_attr_setstacksize(&attr, stack_size);
 assert_status(status == 0, status, "pthread_attr_setstacksize");
-// Configure glibc guard page.
-pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type));
 ThreadState state;
 {
 pthread_t tid;
 log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ",
 (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr));
 } else {
 log_warning(os, thread)("Failed to start thread - pthread_create failed (%s) for attributes: %s.",
 os::errno_name(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr));
+// Log some OS information which might explain why creating the thread failed.
+log_info(os, thread)("Number of threads approx. running in the VM: %d", Threads::number_of_threads());
+LogStream st(Log(os, thread)::info());
+os::Posix::print_rlimit_info(&st);
+os::print_memory_info(&st);
+os::Linux::print_proc_sys_info(&st);
+os::Linux::print_container_info(&st);
 }
 pthread_attr_destroy(&attr);
 if (ret != 0) {
 osthread->set_pthread_id(tid);
 // Wait until child thread is either initialized or aborted
 {
 Monitor* sync_with_child = osthread->startThread_lock();
-MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
+MutexLocker ml(sync_with_child, Mutex::_no_safepoint_check_flag);
 while ((state = osthread->get_state()) == ALLOCATED) {
-sync_with_child->wait(Mutex::_no_safepoint_check_flag);
+sync_with_child->wait_without_safepoint_check();
 }
 }
 }
 // Aborted due to thread limit being reached
 void os::pd_start_thread(Thread* thread) {
 OSThread * osthread = thread->osthread();
 assert(osthread->get_state() != INITIALIZED, "just checking");
 Monitor* sync_with_child = osthread->startThread_lock();
-MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
+MutexLocker ml(sync_with_child, Mutex::_no_safepoint_check_flag);
 sync_with_child->notify();
 }
 // Free Linux resources related to the OSThread
 void os::free_thread(OSThread* osthread) {
 }
 ////////////////////////////////////////////////////////////////////////////////
 // time support
+#ifndef SUPPORTS_CLOCK_MONOTONIC
+#error "Build platform doesn't support clock_gettime and related functionality"
+#endif
 // Time since start-up in seconds to a fine granularity.
 // Used by VMSelfDestructTimer and the MemProfiler.
 double os::elapsedTime() {
 return ((double)os::elapsed_counter()) / os::elapsed_frequency(); // nanosecond resolution
 jlong os::elapsed_frequency() {
 return NANOSECS_PER_SEC; // nanosecond resolution
 }
 bool os::supports_vtime() { return true; }
-bool os::enable_vtime()   { return false; }
-bool os::vtime_enabled()  { return false; }
 double os::elapsedVTime() {
 struct rusage usage;
 int retval = getrusage(RUSAGE_THREAD, &usage);
 if (retval == 0) {
 assert(status != -1, "linux error");
 seconds = jlong(time.tv_sec);
 nanos = jlong(time.tv_usec) * 1000;
 }
-#ifndef CLOCK_MONOTONIC
-#define CLOCK_MONOTONIC (1)
-#endif
-void os::Linux::clock_init() {
-// we do dlopen's in this particular order due to bug in linux
-// dynamical loader (see 6348968) leading to crash on exit
-void* handle = dlopen("librt.so.1", RTLD_LAZY);
-if (handle == NULL) {
-handle = dlopen("librt.so", RTLD_LAZY);
-}
-if (handle) {
-int (*clock_getres_func)(clockid_t, struct timespec*) =
-(int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_getres");
-int (*clock_gettime_func)(clockid_t, struct timespec*) =
-(int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_gettime");
-if (clock_getres_func && clock_gettime_func) {
-// See if monotonic clock is supported by the kernel. Note that some
-// early implementations simply return kernel jiffies (updated every
-// 1/100 or 1/1000 second). It would be bad to use such a low res clock
-// for nano time (though the monotonic property is still nice to have).
-// It's fixed in newer kernels, however clock_getres() still returns
-// 1/HZ. We check if clock_getres() works, but will ignore its reported
-// resolution for now. Hopefully as people move to new kernels, this
-// won't be a problem.
-struct timespec res;
-struct timespec tp;
-if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 &&
-clock_gettime_func(CLOCK_MONOTONIC, &tp)  == 0) {
-// yes, monotonic clock is supported
-_clock_gettime = clock_gettime_func;
-return;
-} else {
-// close librt if there is no monotonic clock
-dlclose(handle);
-}
-}
-}
-warning("No monotonic clock was available - timed services may " \
-"be adversely affected if the time-of-day clock changes");
-}
-#ifndef SYS_clock_getres
-#if defined(X86) || defined(PPC64) || defined(S390)
-#define SYS_clock_getres AMD64_ONLY(229) IA32_ONLY(266) PPC64_ONLY(247) S390_ONLY(261)
-#define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
-#else
-#warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time"
-#define sys_clock_getres(x,y)  -1
-#endif
-#else
-#define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
-#endif
 void os::Linux::fast_thread_clock_init() {
 if (!UseLinuxPosixThreadCPUClocks) {
 return;
 }
 clockid_t clockid;
 struct timespec tp;
 int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) =
 (int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid");
 // Switch to using fast clocks for thread cpu time if
-// the sys_clock_getres() returns 0 error code.
+// the clock_getres() returns 0 error code.
 // Note, that some kernels may support the current thread
 // clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks
 // returned by the pthread_getcpuclockid().
-// If the fast Posix clocks are supported then the sys_clock_getres()
+// If the fast Posix clocks are supported then the clock_getres()
 // must return at least tp.tv_sec == 0 which means a resolution
 // better than 1 sec. This is extra check for reliability.
 if (pthread_getcpuclockid_func &&
 pthread_getcpuclockid_func(_main_thread, &clockid) == 0 &&
-sys_clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
+os::Posix::clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
 _supports_fast_thread_cpu_time = true;
 _pthread_getcpuclockid = pthread_getcpuclockid_func;
 }
 }
 jlong os::javaTimeNanos() {
 if (os::supports_monotonic_clock()) {
 struct timespec tp;
-int status = Linux::clock_gettime(CLOCK_MONOTONIC, &tp);
+int status = os::Posix::clock_gettime(CLOCK_MONOTONIC, &tp);
 assert(status == 0, "gettime error");
 jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec);
 return result;
 } else {
 timeval time;
 // called from signal handler. Before adding something to os::abort(), make
 // sure it is async-safe and can handle partially initialized VM.
 void os::abort(bool dump_core, void* siginfo, const void* context) {
 os::shutdown();
 if (dump_core) {
+if (DumpPrivateMappingsInCore) {
+ClassLoader::close_jrt_image();
+}
 #ifndef PRODUCT
 fdStream out(defaultStream::output_fd());
 out.print_raw("Current thread is ");
 char buf[16];
 jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
 ::exit(1);
 }
 // Die immediately, no exit hook, no abort hook, no cleanup.
+// Dump a core file, if possible, for debugging.
 void os::die() {
-::abort();
+if (TestUnresponsiveErrorHandler && !CreateCoredumpOnCrash) {
+// For TimeoutInErrorHandlingTest.java, we just kill the VM
+// and don't take the time to generate a core file.
+os::signal_raise(SIGKILL);
+} else {
+::abort();
+}
 }
 // thread_id is kernel thread id (similar to Solaris LWP id)
 intx os::current_thread_id() { return os::Linux::gettid(); }
 int os::current_process_id() {
 void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
 void * result = NULL;
 bool load_attempted = false;
+log_info(os)("attempting shared library load of %s", filename);
 // Check whether the library to load might change execution rights
 // of the stack. If they are changed, the protection of the stack
 // guard pages will be lost. We need a safepoint to fix this.
 //
 // See Linux man page execstack(8) for more info.
 if (!ElfFile::specifies_noexecstack(filename)) {
 if (!is_init_completed()) {
 os::Linux::_stack_is_executable = true;
 // This is OK - No Java threads have been created yet, and hence no
 // stack guard pages to fix.
-//
-// This should happen only when you are building JDK7 using a very
-// old version of JDK6 (e.g., with JPRT) and running test_gamma.
 //
 // Dynamic loader will make all stacks executable after
 // this function returns, and will not do that again.
 assert(Threads::number_of_threads() == 0, "no Java threads should exist yet.");
 } else {
 if (failed_to_read_elf_head) {
 // file i/o error - report dlerror() msg
 return NULL;
 }
+if (elf_head.e_ident[EI_DATA] != LITTLE_ENDIAN_ONLY(ELFDATA2LSB) BIG_ENDIAN_ONLY(ELFDATA2MSB)) {
+// handle invalid/out of range endianness values
+if (elf_head.e_ident[EI_DATA] == 0 || elf_head.e_ident[EI_DATA] > 2) {
+return NULL;
+}
+#if defined(VM_LITTLE_ENDIAN)
+// VM is LE, shared object BE
+elf_head.e_machine = be16toh(elf_head.e_machine);
+#else
+// VM is BE, shared object LE
+elf_head.e_machine = le16toh(elf_head.e_machine);
+#endif
+}
 typedef struct {
 Elf32_Half    code;         // Actual value as defined in elf.h
 Elf32_Half    compat_class; // Compatibility of archs at VM's sense
 unsigned char elf_class;    // 32 or 64 bit
-unsigned char endianess;    // MSB or LSB
+unsigned char endianness;   // MSB or LSB
 char*         name;         // String representation
 } arch_t;
 #ifndef EM_486
 #define EM_486          6               /* Intel 80486 */
 {EM_SH,          EM_SH,      ELFCLASS32, ELFDATA2LSB, (char*)"SuperH"},
 #else
 {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"},
 {EM_SH,          EM_SH,      ELFCLASS32, ELFDATA2MSB, (char*)"SuperH BE"},
 #endif
-{EM_ARM,         EM_ARM,     ELFCLASS32,   ELFDATA2LSB, (char*)"ARM"},
+{EM_ARM,         EM_ARM,     ELFCLASS32, ELFDATA2LSB, (char*)"ARM"},
-{EM_S390,        EM_S390,    ELFCLASSNONE, ELFDATA2MSB, (char*)"IBM System/390"},
+// we only support 64 bit z architecture
+{EM_S390,        EM_S390,    ELFCLASS64, ELFDATA2MSB, (char*)"IBM System/390"},
 {EM_ALPHA,       EM_ALPHA,   ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"},
 {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"},
 {EM_MIPS,        EM_MIPS,    ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"},
 {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
 {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
 #else
 #error Method os::dll_load requires that one of following is defined:\
 AARCH64, ALPHA, ARM, AMD64, IA32, IA64, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, S390, SH, __sparc
 #endif
-// Identify compatability class for VM's architecture and library's architecture
+// Identify compatibility class for VM's architecture and library's architecture
 // Obtain string descriptions for architectures
 arch_t lib_arch={elf_head.e_machine,0,elf_head.e_ident[EI_CLASS], elf_head.e_ident[EI_DATA], NULL};
 int running_arch_index=-1;
 // Even though running architecture detection failed
 // we may still continue with reporting dlerror() message
 return NULL;
 }
-if (lib_arch.endianess != arch_array[running_arch_index].endianess) {
-::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: endianness mismatch)");
-return NULL;
-}
-#ifndef S390
-if (lib_arch.elf_class != arch_array[running_arch_index].elf_class) {
-::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: architecture word width mismatch)");
-return NULL;
-}
-#endif // !S390
 if (lib_arch.compat_class != arch_array[running_arch_index].compat_class) {
-if (lib_arch.name!=NULL) {
+if (lib_arch.name != NULL) {
 ::snprintf(diag_msg_buf, diag_msg_max_length-1,
-" (Possible cause: can't load %s-bit .so on a %s-bit platform)",
+" (Possible cause: can't load %s .so on a %s platform)",
 lib_arch.name, arch_array[running_arch_index].name);
 } else {
 ::snprintf(diag_msg_buf, diag_msg_max_length-1,
-" (Possible cause: can't load this .so (machine code=0x%x) on a %s-bit platform)",
+" (Possible cause: can't load this .so (machine code=0x%x) on a %s platform)",
-lib_arch.code,
+lib_arch.code, arch_array[running_arch_index].name);
-arch_array[running_arch_index].name);
+}
-}
+return NULL;
+}
+if (lib_arch.endianness != arch_array[running_arch_index].endianness) {
+::snprintf(diag_msg_buf, diag_msg_max_length-1, " (Possible cause: endianness mismatch)");
+return NULL;
+}
+// ELF file class/capacity : 0 - invalid, 1 - 32bit, 2 - 64bit
+if (lib_arch.elf_class > 2 || lib_arch.elf_class < 1) {
+::snprintf(diag_msg_buf, diag_msg_max_length-1, " (Possible cause: invalid ELF file class)");
+return NULL;
+}
+if (lib_arch.elf_class != arch_array[running_arch_index].elf_class) {
+::snprintf(diag_msg_buf, diag_msg_max_length-1,
+" (Possible cause: architecture word width mismatch, can't load %d-bit .so on a %d-bit platform)",
+(int) lib_arch.elf_class * 32, arch_array[running_arch_index].elf_class * 32);
+return NULL;
 }
 return NULL;
 }
 void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
 int ebuflen) {
 void * result = ::dlopen(filename, RTLD_LAZY);
 if (result == NULL) {
-::strncpy(ebuf, ::dlerror(), ebuflen - 1);
+const char* error_report = ::dlerror();
-ebuf[ebuflen-1] = '\0';
+if (error_report == NULL) {
+error_report = "dlerror returned no error description";
+}
+if (ebuf != NULL && ebuflen > 0) {
+::strncpy(ebuf, error_report, ebuflen-1);
+ebuf[ebuflen-1]='\0';
+}
+Events::log(NULL, "Loading shared library %s failed, %s", filename, error_report);
+log_info(os)("shared library load of %s failed, %s", filename, error_report);
+} else {
+Events::log(NULL, "Loaded shared library %s", filename);
+log_info(os)("shared library load of %s was successful", filename);
 }
 return result;
 }
 void * os::Linux::dll_load_in_vmthread(const char *filename, char *ebuf,
 void* os::get_default_process_handle() {
 return (void*)::dlopen(NULL, RTLD_LAZY);
 }
-static bool _print_ascii_file(const char* filename, outputStream* st) {
+static bool _print_ascii_file(const char* filename, outputStream* st, const char* hdr = NULL) {
 int fd = ::open(filename, O_RDONLY);
 if (fd == -1) {
 return false;
+}
+if (hdr != NULL) {
+st->print_cr("%s", hdr);
 }
 char buf[33];
 int bytes;
 buf[32] = '\0';
 char permissions[5];
 char device[6];
 char name[PATH_MAX + 1];
 // Parse fields from line
-sscanf(line, UINT64_FORMAT_X "-" UINT64_FORMAT_X " %4s " UINT64_FORMAT_X " %5s " INT64_FORMAT " %s",
+sscanf(line, UINT64_FORMAT_X "-" UINT64_FORMAT_X " %4s " UINT64_FORMAT_X " %7s " INT64_FORMAT " %s",
 &base, &top, permissions, &offset, device, &inode, name);
 // Filter by device id '00:00' so that we only get file system mapped files.
 if (strcmp(device, "00:00") != 0) {
 os::Linux::print_full_memory_info(st);
 os::Linux::print_proc_sys_info(st);
+os::Linux::print_ld_preload_file(st);
 os::Linux::print_container_info(st);
+VM_Version::print_platform_virtualization_info(st);
+os::Linux::print_steal_info(st);
 }
 // Try to identify popular distros.
 // Most Linux distributions have a /etc/XXX-release file, which contains
 // the OS version string. Newer Linux distributions have a /etc/lsb-release
 st->print("\n/proc/meminfo:\n");
 _print_ascii_file("/proc/meminfo", st);
 st->cr();
 }
+void os::Linux::print_ld_preload_file(outputStream* st) {
+_print_ascii_file("/etc/ld.so.preload", st, "\n/etc/ld.so.preload:");
+st->cr();
+}
 void os::Linux::print_container_info(outputStream* st) {
 if (!OSContainer::is_containerized()) {
 return;
 }
 st->print("container (cgroup) information:\n");
 const char *p_ct = OSContainer::container_type();
-st->print("container_type: %s\n", p_ct != NULL ? p_ct : "failed");
+st->print("container_type: %s\n", p_ct != NULL ? p_ct : "not supported");
 char *p = OSContainer::cpu_cpuset_cpus();
-st->print("cpu_cpuset_cpus: %s\n", p != NULL ? p : "failed");
+st->print("cpu_cpuset_cpus: %s\n", p != NULL ? p : "not supported");
 free(p);
 p = OSContainer::cpu_cpuset_memory_nodes();
-st->print("cpu_memory_nodes: %s\n", p != NULL ? p : "failed");
+st->print("cpu_memory_nodes: %s\n", p != NULL ? p : "not supported");
 free(p);
 int i = OSContainer::active_processor_count();
+st->print("active_processor_count: ");
 if (i > 0) {
-st->print("active_processor_count: %d\n", i);
+st->print("%d\n", i);
 } else {
-st->print("active_processor_count: failed\n");
+st->print("not supported\n");
 }
 i = OSContainer::cpu_quota();
-st->print("cpu_quota: %d\n", i);
+st->print("cpu_quota: ");
+if (i > 0) {
+st->print("%d\n", i);
+} else {
+st->print("%s\n", i == OSCONTAINER_ERROR ? "not supported" : "no quota");
+}
 i = OSContainer::cpu_period();
-st->print("cpu_period: %d\n", i);
+st->print("cpu_period: ");
+if (i > 0) {
+st->print("%d\n", i);
+} else {
+st->print("%s\n", i == OSCONTAINER_ERROR ? "not supported" : "no period");
+}
 i = OSContainer::cpu_shares();
-st->print("cpu_shares: %d\n", i);
+st->print("cpu_shares: ");
+if (i > 0) {
+st->print("%d\n", i);
+} else {
+st->print("%s\n", i == OSCONTAINER_ERROR ? "not supported" : "no shares");
+}
 jlong j = OSContainer::memory_limit_in_bytes();
-st->print("memory_limit_in_bytes: " JLONG_FORMAT "\n", j);
+st->print("memory_limit_in_bytes: ");
+if (j > 0) {
+st->print(JLONG_FORMAT "\n", j);
+} else {
+st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited");
+}
 j = OSContainer::memory_and_swap_limit_in_bytes();
-st->print("memory_and_swap_limit_in_bytes: " JLONG_FORMAT "\n", j);
+st->print("memory_and_swap_limit_in_bytes: ");
+if (j > 0) {
+st->print(JLONG_FORMAT "\n", j);
+} else {
+st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited");
+}
 j = OSContainer::memory_soft_limit_in_bytes();
-st->print("memory_soft_limit_in_bytes: " JLONG_FORMAT "\n", j);
+st->print("memory_soft_limit_in_bytes: ");
+if (j > 0) {
+st->print(JLONG_FORMAT "\n", j);
+} else {
+st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited");
+}
 j = OSContainer::OSContainer::memory_usage_in_bytes();
-st->print("memory_usage_in_bytes: " JLONG_FORMAT "\n", j);
+st->print("memory_usage_in_bytes: ");
+if (j > 0) {
+st->print(JLONG_FORMAT "\n", j);
+} else {
+st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited");
+}
 j = OSContainer::OSContainer::memory_max_usage_in_bytes();
-st->print("memory_max_usage_in_bytes: " JLONG_FORMAT "\n", j);
+st->print("memory_max_usage_in_bytes: ");
+if (j > 0) {
+st->print(JLONG_FORMAT "\n", j);
+} else {
+st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited");
+}
 st->cr();
+}
+void os::Linux::print_steal_info(outputStream* st) {
+if (has_initial_tick_info) {
+CPUPerfTicks pticks;
+bool res = os::Linux::get_tick_information(&pticks, -1);
+if (res && pticks.has_steal_ticks) {
+uint64_t steal_ticks_difference = pticks.steal - initial_steal_ticks;
+uint64_t total_ticks_difference = pticks.total - initial_total_ticks;
+double steal_ticks_perc = 0.0;
+if (total_ticks_difference != 0) {
+steal_ticks_perc = (double) steal_ticks_difference / total_ticks_difference;
+}
+st->print_cr("Steal ticks since vm start: " UINT64_FORMAT, steal_ticks_difference);
+st->print_cr("Steal ticks percentage since vm start:%7.3f", steal_ticks_perc);
+}
+}
 }
 void os::print_memory_info(outputStream* st) {
 st->print("Memory:");
 #elif defined(M68K)
 const char* search_string = "CPU";
 #elif defined(PPC64)
 const char* search_string = "cpu";
 #elif defined(S390)
-const char* search_string = "processor";
+const char* search_string = "machine =";
 #elif defined(SPARC)
 const char* search_string = "cpu";
 #else
 const char* search_string = "Processor";
 #endif
 }
 ////////////////////////////////////////////////////////////////////////////////
 // sun.misc.Signal support
-static volatile jint sigint_count = 0;
 static void UserHandler(int sig, void *siginfo, void *context) {
-// 4511530 - sem_post is serialized and handled by the manager thread. When
-// the program is interrupted by Ctrl-C, SIGINT is sent to every thread. We
-// don't want to flood the manager thread with sem_post requests.
-if (sig == SIGINT && Atomic::add(1, &sigint_count) > 1) {
-return;
-}
 // Ctrl-C is pressed during error reporting, likely because the error
 // handler fails to abort. Let VM die immediately.
 if (sig == SIGINT && VMError::is_error_reported()) {
 os::die();
 }
 os::signal_notify(sig);
 }
 void* os::user_handler() {
 return CAST_FROM_FN_PTR(void*, UserHandler);
-}
-static struct timespec create_semaphore_timespec(unsigned int sec, int nsec) {
-struct timespec ts;
-// Semaphore's are always associated with CLOCK_REALTIME
-os::Linux::clock_gettime(CLOCK_REALTIME, &ts);
-// see os_posix.cpp for discussion on overflow checking
-if (sec >= MAX_SECS) {
-ts.tv_sec += MAX_SECS;
-ts.tv_nsec = 0;
-} else {
-ts.tv_sec += sec;
-ts.tv_nsec += nsec;
-if (ts.tv_nsec >= NANOSECS_PER_SEC) {
-ts.tv_nsec -= NANOSECS_PER_SEC;
-++ts.tv_sec; // note: this must be <= max_secs
-}
-}
-return ts;
 }
 extern "C" {
 typedef void (*sa_handler_t)(int);
 typedef void (*sa_sigaction_t)(int, siginfo_t *, void *);
 assert(ReduceSignalUsage, "signal semaphore should be created");
 }
 }
 static int check_pending_signals() {
-Atomic::store(0, &sigint_count);
 for (;;) {
 for (int i = 0; i < NSIG + 1; i++) {
 jint n = pending_signals[i];
 if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
 return i;
 int highest_node_number = Linux::numa_max_node();
 int num_nodes = 0;
 // Get the total number of nodes in the system including nodes without memory.
 for (node = 0; node <= highest_node_number; node++) {
-if (isnode_in_existing_nodes(node)) {
+if (is_node_in_existing_nodes(node)) {
 num_nodes++;
 }
 }
 return num_nodes;
 }
 // Map all node ids in which it is possible to allocate memory. Also nodes are
 // not always consecutively available, i.e. available from 0 to the highest
 // node number. If the nodes have been bound explicitly using numactl membind,
 // then allocate memory from those nodes only.
 for (int node = 0; node <= highest_node_number; node++) {
-if (Linux::isnode_in_bound_nodes((unsigned int)node)) {
+if (Linux::is_node_in_bound_nodes((unsigned int)node)) {
 ids[i++] = node;
 }
 }
 return i;
 }
 libnuma_dlsym(handle, "numa_bitmask_isbitset")));
 set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
 libnuma_dlsym(handle, "numa_distance")));
 set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
 libnuma_v2_dlsym(handle, "numa_get_membind")));
+set_numa_get_interleave_mask(CAST_TO_FN_PTR(numa_get_interleave_mask_func_t,
+libnuma_v2_dlsym(handle, "numa_get_interleave_mask")));
 if (numa_available() != -1) {
 set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
 set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr"));
 set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr"));
+set_numa_interleave_bitmask(_numa_get_interleave_mask());
+set_numa_membind_bitmask(_numa_get_membind());
 // Create an index -> node mapping, since nodes are not always consecutive
 _nindex_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
 rebuild_nindex_to_node_map();
 // Create a cpu -> node mapping
 _cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
 void os::Linux::rebuild_nindex_to_node_map() {
 int highest_node_number = Linux::numa_max_node();
 nindex_to_node()->clear();
 for (int node = 0; node <= highest_node_number; node++) {
-if (Linux::isnode_in_existing_nodes(node)) {
+if (Linux::is_node_in_existing_nodes(node)) {
 nindex_to_node()->append(node);
 }
 }
 }
 for (size_t i = 0; i < node_num; i++) {
 // Check if node is configured (not a memory-less node). If it is not, find
 // the closest configured node. Check also if node is bound, i.e. it's allowed
 // to allocate memory from the node. If it's not allowed, map cpus in that node
 // to the closest node from which memory allocation is allowed.
-if (!isnode_in_configured_nodes(nindex_to_node()->at(i)) ||
+if (!is_node_in_configured_nodes(nindex_to_node()->at(i)) ||
-!isnode_in_bound_nodes(nindex_to_node()->at(i))) {
+!is_node_in_bound_nodes(nindex_to_node()->at(i))) {
 closest_distance = INT_MAX;
 // Check distance from all remaining nodes in the system. Ignore distance
 // from itself, from another non-configured node, and from another non-bound
 // node.
 for (size_t m = 0; m < node_num; m++) {
 if (m != i &&
-isnode_in_configured_nodes(nindex_to_node()->at(m)) &&
+is_node_in_configured_nodes(nindex_to_node()->at(m)) &&
-isnode_in_bound_nodes(nindex_to_node()->at(m))) {
+is_node_in_bound_nodes(nindex_to_node()->at(m))) {
 distance = numa_distance(nindex_to_node()->at(i), nindex_to_node()->at(m));
 // If a closest node is found, update. There is always at least one
 // configured and bound node in the system so there is always at least
 // one node close.
 if (distance != 0 && distance < closest_distance) {
 os::Linux::numa_interleave_memory_v2_func_t os::Linux::_numa_interleave_memory_v2;
 os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
 os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
 os::Linux::numa_distance_func_t os::Linux::_numa_distance;
 os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
+os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask;
+os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy;
 unsigned long* os::Linux::_numa_all_nodes;
 struct bitmask* os::Linux::_numa_all_nodes_ptr;
 struct bitmask* os::Linux::_numa_nodes_ptr;
+struct bitmask* os::Linux::_numa_interleave_bitmask;
+struct bitmask* os::Linux::_numa_membind_bitmask;
 bool os::pd_uncommit_memory(char* addr, size_t size) {
 uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0);
 return res  != (uintptr_t) MAP_FAILED;
 // protection of malloc'ed or statically allocated memory). Check the
 // caller if you hit this assert.
 assert(addr == bottom, "sanity check");
 size = align_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
+Events::log(NULL, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(bottom), p2i(bottom+size), prot);
 return ::mprotect(bottom, size, prot) == 0;
 }
 // Set protections specified
 bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
 }
 return result;
 }
-// Set the coredump_filter bits to include largepages in core dump (bit 6)
-//
 // From the coredump_filter documentation:
 //
 // - (bit 0) anonymous private memory
 // - (bit 1) anonymous shared memory
 // - (bit 2) file-backed private memory
 // - (bit 5) hugetlb private memory
 // - (bit 6) hugetlb shared memory
 // - (bit 7) dax private memory
 // - (bit 8) dax shared memory
 //
-static void set_coredump_filter(bool largepages, bool dax_shared) {
+static void set_coredump_filter(CoredumpFilterBit bit) {
 FILE *f;
 long cdm;
-bool filter_changed = false;
 if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) {
 return;
 }
 if (fscanf(f, "%lx", &cdm) != 1) {
 fclose(f);
 return;
 }
+long saved_cdm = cdm;
 rewind(f);
+cdm |= bit;
-if (largepages && (cdm & LARGEPAGES_BIT) == 0) {
-cdm |= LARGEPAGES_BIT;
+if (cdm != saved_cdm) {
-filter_changed = true;
-}
-if (dax_shared && (cdm & DAX_SHARED_BIT) == 0) {
-cdm |= DAX_SHARED_BIT;
-filter_changed = true;
-}
-if (filter_changed) {
 fprintf(f, "%#lx", cdm);
 }
 fclose(f);
 }
 }
 size_t large_page_size = Linux::setup_large_page_size();
 UseLargePages          = Linux::setup_large_page_type(large_page_size);
-set_coredump_filter(true /*largepages*/, false /*dax_shared*/);
+set_coredump_filter(LARGEPAGES_BIT);
 }
 #ifndef SHM_HUGETLB
 #define SHM_HUGETLB 04000
 #endif
 // Reserve memory at an arbitrary address, only if that area is
 // available (and not reserved for something else).
 char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
-const int max_tries = 10;
-char* base[max_tries];
-size_t size[max_tries];
-const size_t gap = 0x000000;
 // Assert only that the size is a multiple of the page size, since
 // that's all that mmap requires, and since that's all we really know
 // about at this low abstraction level.  If we need higher alignment,
 // we can either pass an alignment to this method or verify alignment
 // in one of the methods further up the call chain.  See bug 5044738.
 if (addr != NULL) {
 // mmap() is successful but it fails to reserve at the requested address
 anon_munmap(addr, bytes);
 }
-int i;
+return NULL;
-for (i = 0; i < max_tries; ++i) {
-base[i] = reserve_memory(bytes);
-if (base[i] != NULL) {
-// Is this the block we wanted?
-if (base[i] == requested_addr) {
-size[i] = bytes;
-break;
-}
-// Does this overlap the block we wanted? Give back the overlapped
-// parts and try again.
-ptrdiff_t top_overlap = requested_addr + (bytes + gap) - base[i];
-if (top_overlap >= 0 && (size_t)top_overlap < bytes) {
-unmap_memory(base[i], top_overlap);
-base[i] += top_overlap;
-size[i] = bytes - top_overlap;
-} else {
-ptrdiff_t bottom_overlap = base[i] + bytes - requested_addr;
-if (bottom_overlap >= 0 && (size_t)bottom_overlap < bytes) {
-unmap_memory(requested_addr, bottom_overlap);
-size[i] = bytes - bottom_overlap;
-} else {
-size[i] = bytes;
-}
-}
-}
-}
-// Give back the unused reserved pieces.
-for (int j = 0; j < i; ++j) {
-if (base[j] != NULL) {
-unmap_memory(base[j], size[j]);
-}
-}
-if (i < max_tries) {
-return requested_addr;
-} else {
-return NULL;
-}
-}
-size_t os::read(int fd, void *buf, unsigned int nBytes) {
-return ::read(fd, buf, nBytes);
-}
-size_t os::read_at(int fd, void *buf, unsigned int nBytes, jlong offset) {
-return ::pread(fd, buf, nBytes, offset);
-}
-// Short sleep, direct OS call.
-//
-// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
-// sched_yield(2) will actually give up the CPU:
-//
-//   * Alone on this pariticular CPU, keeps running.
-//   * Before the introduction of "skip_buddy" with "compat_yield" disabled
-//     (pre 2.6.39).
-//
-// So calling this with 0 is an alternative.
-//
-void os::naked_short_sleep(jlong ms) {
-struct timespec req;
-assert(ms < 1000, "Un-interruptable sleep, short time use only");
-req.tv_sec = 0;
-if (ms > 0) {
-req.tv_nsec = (ms % 1000) * 1000000;
-} else {
-req.tv_nsec = 1;
-}
-nanosleep(&req, NULL);
-return;
 }
 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
 void os::infinite_sleep() {
 while (true) {    // sleep forever ...
 // Used to convert frequent JVM_Yield() to nops
 bool os::dont_yield() {
 return DontYieldALot;
 }
+// Linux CFS scheduler (since 2.6.23) does not guarantee sched_yield(2) will
+// actually give up the CPU. Since skip buddy (v2.6.28):
+//
+// * Sets the yielding task as skip buddy for current CPU's run queue.
+// * Picks next from run queue, if empty, picks a skip buddy (can be the yielding task).
+// * Clears skip buddies for this run queue (yielding task no longer a skip buddy).
+//
+// An alternative is calling os::naked_short_nanosleep with a small number to avoid
+// getting re-scheduled immediately.
+//
 void os::naked_yield() {
 sched_yield();
 }
 ////////////////////////////////////////////////////////////////////////////////
 // The following code actually changes the niceness of kernel-thread/LWP. It
 // has an assumption that setpriority() only modifies one kernel-thread/LWP,
 // not the entire user process, and user level threads are 1:1 mapped to kernel
 // threads. It has always been the case, but could change in the future. For
 // this reason, the code should not be used as default (ThreadPriorityPolicy=0).
-// It is only used when ThreadPriorityPolicy=1 and requires root privilege.
+// It is only used when ThreadPriorityPolicy=1 and may require system level permission
+// (e.g., root privilege or CAP_SYS_NICE capability).
 int os::java_to_os_priority[CriticalPriority + 1] = {
 19,              // 0 Entry should never be used
 4,              // 1 MinPriority
 -5               // 11 CriticalPriority
 };
 static int prio_init() {
 if (ThreadPriorityPolicy == 1) {
-// Only root can raise thread priority. Don't allow ThreadPriorityPolicy=1
-// if effective uid is not root. Perhaps, a more elegant way of doing
-// this is to test CAP_SYS_NICE capability, but that will require libcap.so
 if (geteuid() != 0) {
 if (!FLAG_IS_DEFAULT(ThreadPriorityPolicy)) {
-warning("-XX:ThreadPriorityPolicy requires root privilege on Linux");
+warning("-XX:ThreadPriorityPolicy=1 may require system level permission, " \
+"e.g., being the root user. If the necessary permission is not " \
+"possessed, changes to priority will be silently ignored.");
 }
-ThreadPriorityPolicy = 0;
 }
 }
 if (UseCriticalJavaThreadPriority) {
 os::java_to_os_priority[MaxPriority] = os::java_to_os_priority[CriticalPriority];
 }
 ShouldNotReachHere();
 }
 // managed to send the signal and switch to SUSPEND_REQUEST, now wait for SUSPENDED
 while (true) {
-if (sr_semaphore.timedwait(create_semaphore_timespec(0, 2 * NANOSECS_PER_MILLISEC))) {
+if (sr_semaphore.timedwait(2)) {
 break;
 } else {
 // timeout
 os::SuspendResume::State cancelled = osthread->sr.cancel_suspend();
 if (cancelled == os::SuspendResume::SR_RUNNING) {
 return;
 }
 while (true) {
 if (sr_notify(osthread) == 0) {
-if (sr_semaphore.timedwait(create_semaphore_timespec(0, 2 * NANOSECS_PER_MILLISEC))) {
+if (sr_semaphore.timedwait(2)) {
 if (osthread->sr.is_running()) {
 return;
 }
 }
 } else {
 // This boolean allows users to forward their own non-matching signals
 // to JVM_handle_linux_signal, harmlessly.
 bool os::Linux::signal_handlers_are_installed = false;
 // For signal-chaining
-struct sigaction sigact[NSIG];
-uint64_t sigs = 0;
-#if (64 < NSIG-1)
-#error "Not all signals can be encoded in sigs. Adapt its type!"
-#endif
 bool os::Linux::libjsig_is_loaded = false;
 typedef struct sigaction *(*get_signal_t)(int);
 get_signal_t os::Linux::get_signal_action = NULL;
 struct sigaction* os::Linux::get_chained_signal_action(int sig) {
 // Retrieve the old signal handler from libjsig
 actp = (*get_signal_action)(sig);
 }
 if (actp == NULL) {
 // Retrieve the preinstalled signal handler from jvm
-actp = get_preinstalled_handler(sig);
+actp = os::Posix::get_preinstalled_handler(sig);
 }
 return actp;
 }
 }
 }
 return chained;
 }
-struct sigaction* os::Linux::get_preinstalled_handler(int sig) {
-if ((((uint64_t)1 << (sig-1)) & sigs) != 0) {
-return &sigact[sig];
-}
-return NULL;
-}
-void os::Linux::save_preinstalled_handler(int sig, struct sigaction& oldAct) {
-assert(sig > 0 && sig < NSIG, "vm signal out of expected range");
-sigact[sig] = oldAct;
-sigs |= (uint64_t)1 << (sig-1);
-}
 // for diagnostic
 int sigflags[NSIG];
 int os::Linux::get_our_sigflags(int sig) {
 assert(sig > 0 && sig < NSIG, "vm signal out of expected range");
 if (AllowUserSignalHandlers || !set_installed) {
 // Do not overwrite; user takes responsibility to forward to us.
 return;
 } else if (UseSignalChaining) {
 // save the old handler in jvm
-save_preinstalled_handler(sig, oldAct);
+os::Posix::save_preinstalled_handler(sig, oldAct);
 // libjsig also interposes the sigaction() call below and saves the
 // old sigaction on it own.
 } else {
 fatal("Encountered unexpected pre-existing sigaction handler "
 "%#lx for signal %d.", (long)oldhand, sig);
 // For reference, please, see IEEE Std 1003.1-2004:
 //   http://www.unix.org/single_unix_specification
 jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
 struct timespec tp;
-int rc = os::Linux::clock_gettime(clockid, &tp);
+int rc = os::Posix::clock_gettime(clockid, &tp);
 assert(rc == 0, "clock_gettime is expected to return 0 code");
 return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
-}
-void os::Linux::initialize_os_info() {
-assert(_os_version == 0, "OS info already initialized");
-struct utsname _uname;
-uint32_t major;
-uint32_t minor;
-uint32_t fix;
-int rc;
-// Kernel version is unknown if
-// verification below fails.
-_os_version = 0x01000000;
-rc = uname(&_uname);
-if (rc != -1) {
-rc = sscanf(_uname.release,"%d.%d.%d", &major, &minor, &fix);
-if (rc == 3) {
-if (major < 256 && minor < 256 && fix < 256) {
-// Kernel version format is as expected,
-// set it overriding unknown state.
-_os_version = (major << 16) |
-(minor << 8 ) |
-(fix   << 0 ) ;
-}
-}
-}
-}
-uint32_t os::Linux::os_version() {
-assert(_os_version != 0, "not initialized");
-return _os_version & 0x00FFFFFF;
-}
-bool os::Linux::os_version_is_known() {
-assert(_os_version != 0, "not initialized");
-return _os_version & 0x01000000 ? false : true;
 }
 /////
 // glibc on Linux platform uses non-documented flag
 // to indicate, that some special sort of signal
 }
 init_page_sizes((size_t) Linux::page_size());
 Linux::initialize_system_info();
-Linux::initialize_os_info();
+os::Linux::CPUPerfTicks pticks;
+bool res = os::Linux::get_tick_information(&pticks, -1);
+if (res && pticks.has_steal_ticks) {
+has_initial_tick_info = true;
+initial_total_ticks = pticks.total;
+initial_steal_ticks = pticks.steal;
+}
 // _main_thread points to the thread that created/loaded the JVM.
 Linux::_main_thread = pthread_self();
-Linux::clock_init();
-initial_time_count = javaTimeNanos();
 // retrieve entry point for pthread_setname_np
 Linux::_pthread_setname_np =
 (int(*)(pthread_t, const char*))dlsym(RTLD_DEFAULT, "pthread_setname_np");
 os::Posix::init();
+initial_time_count = javaTimeNanos();
+// Always warn if no monotonic clock available
+if (!os::Posix::supports_monotonic_clock()) {
+warning("No monotonic clock was available - timed services may "    \
+"be adversely affected if the time-of-day clock changes");
+}
 }
 // To install functions for atexit system call
 extern "C" {
 static void perfMemory_exit_helper() {
 void os::pd_init_container_support() {
 OSContainer::init();
 }
+void os::Linux::numa_init() {
+// Java can be invoked as
+// 1. Without numactl and heap will be allocated/configured on all nodes as
+//    per the system policy.
+// 2. With numactl --interleave:
+//      Use numa_get_interleave_mask(v2) API to get nodes bitmask. The same
+//      API for membind case bitmask is reset.
+//      Interleave is only hint and Kernel can fallback to other nodes if
+//      no memory is available on the target nodes.
+// 3. With numactl --membind:
+//      Use numa_get_membind(v2) API to get nodes bitmask. The same API for
+//      interleave case returns bitmask of all nodes.
+// numa_all_nodes_ptr holds bitmask of all nodes.
+// numa_get_interleave_mask(v2) and numa_get_membind(v2) APIs returns correct
+// bitmask when externally configured to run on all or fewer nodes.
+if (!Linux::libnuma_init()) {
+UseNUMA = false;
+} else {
+if ((Linux::numa_max_node() < 1) || Linux::is_bound_to_single_node()) {
+// If there's only one node (they start from 0) or if the process
+// is bound explicitly to a single node using membind, disable NUMA.
+UseNUMA = false;
+} else {
+LogTarget(Info,os) log;
+LogStream ls(log);
+Linux::set_configured_numa_policy(Linux::identify_numa_policy());
+struct bitmask* bmp = Linux::_numa_membind_bitmask;
+const char* numa_mode = "membind";
+if (Linux::is_running_in_interleave_mode()) {
+bmp = Linux::_numa_interleave_bitmask;
+numa_mode = "interleave";
+}
+ls.print("UseNUMA is enabled and invoked in '%s' mode."
+" Heap will be configured using NUMA memory nodes:", numa_mode);
+for (int node = 0; node <= Linux::numa_max_node(); node++) {
+if (Linux::_numa_bitmask_isbitset(bmp, node)) {
+ls.print(" %d", node);
+}
+}
+}
+}
+if (UseParallelGC && UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
+// With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
+// we can make the adaptive lgrp chunk resizing work. If the user specified both
+// UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn
+// and disable adaptive resizing.
+if (UseAdaptiveSizePolicy || UseAdaptiveNUMAChunkSizing) {
+warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, "
+"disabling adaptive resizing (-XX:-UseAdaptiveSizePolicy -XX:-UseAdaptiveNUMAChunkSizing)");
+UseAdaptiveSizePolicy = false;
+UseAdaptiveNUMAChunkSizing = false;
+}
+}
+if (!UseNUMA && ForceNUMA) {
+UseNUMA = true;
+}
+}
 // this is called _after_ the global arguments have been parsed
 jint os::init_2(void) {
+// This could be set after os::Posix::init() but all platforms
+// have to set it the same so we have to mirror Solaris.
+DEBUG_ONLY(os::set_mutex_init_done();)
 os::Posix::init_2();
 Linux::fast_thread_clock_init();
 // Initialize data for jdk.internal.misc.Signal
 if (!ReduceSignalUsage) {
 jdk_misc_signal_init();
 }
+if (AdjustStackSizeForTLS) {
+get_minstack_init();
+}
 // Check and sets minimum stack sizes against command line options
 if (Posix::set_minimum_stack_sizes() == JNI_ERR) {
 return JNI_ERR;
 }
+#if defined(IA32)
+// Need to ensure we've determined the process's initial stack to
+// perform the workaround
+Linux::capture_initial_stack(JavaThread::stack_size_at_create());
+workaround_expand_exec_shield_cs_limit();
+#else
 suppress_primordial_thread_resolution = Arguments::created_by_java_launcher();
 if (!suppress_primordial_thread_resolution) {
 Linux::capture_initial_stack(JavaThread::stack_size_at_create());
 }
-#if defined(IA32)
-workaround_expand_exec_shield_cs_limit();
 #endif
 Linux::libpthread_init();
 Linux::sched_getcpu_init();
 log_info(os)("HotSpot is running with %s, %s",
 Linux::glibc_version(), Linux::libpthread_version());
 if (UseNUMA) {
-if (!Linux::libnuma_init()) {
+Linux::numa_init();
-UseNUMA = false;
-} else {
-if ((Linux::numa_max_node() < 1) || Linux::isbound_to_single_node()) {
-// If there's only one node (they start from 0) or if the process
-// is bound explicitly to a single node using membind, disable NUMA.
-UseNUMA = false;
-}
-}
-if (UseParallelGC && UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
-// With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
-// we can make the adaptive lgrp chunk resizing work. If the user specified both
-// UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn
-// and disable adaptive resizing.
-if (UseAdaptiveSizePolicy || UseAdaptiveNUMAChunkSizing) {
-warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, "
-"disabling adaptive resizing (-XX:-UseAdaptiveSizePolicy -XX:-UseAdaptiveNUMAChunkSizing)");
-UseAdaptiveSizePolicy = false;
-UseAdaptiveNUMAChunkSizing = false;
-}
-}
-if (!UseNUMA && ForceNUMA) {
-UseNUMA = true;
-}
 }
 if (MaxFDLimit) {
 // set the number of file descriptors to max. print out error
 // if getrlimit/setrlimit fails but continue regardless.
 log_info(os)("os::init_2 setrlimit failed: %s", os::strerror(errno));
 }
 }
 }
-// Initialize lock used to serialize thread creation (see os::create_thread)
-Linux::set_createThread_lock(new Mutex(Mutex::leaf, "createThread_lock", false));
 // at-exit methods are called in the reverse order of their registration.
 // atexit functions are called on return from main or as a result of a
 // call to exit(3C). There can be only 32 of these functions registered
 // and atexit() does not set errno.
 }
 // initialize thread priority policy
 prio_init();
-if (!FLAG_IS_DEFAULT(AllocateHeapAt)) {
+if (!FLAG_IS_DEFAULT(AllocateHeapAt) || !FLAG_IS_DEFAULT(AllocateOldGenAt)) {
-set_coredump_filter(false /*largepages*/, true /*dax_shared*/);
+set_coredump_filter(DAX_SHARED_BIT);
 }
+if (DumpPrivateMappingsInCore) {
+set_coredump_filter(FILE_BACKED_PVT_BIT);
+}
+if (DumpSharedMappingsInCore) {
+set_coredump_filter(FILE_BACKED_SHARED_BIT);
+}
 return JNI_OK;
 }
 // Mark the polling page as unreadable
 void os::make_polling_page_unreadable(void) {
 buf[sizeof(buf) - 1] = '\0';
 const int rc = Linux::_pthread_setname_np(pthread_self(), buf);
 // ERANGE should not happen; all other errors should just be ignored.
 assert(rc != ERANGE, "pthread_setname_np failed");
 }
-}
-bool os::distribute_processes(uint length, uint* distribution) {
-// Not yet implemented.
-return false;
 }
 bool os::bind_to_processor(uint processor_id) {
 // Not yet implemented.
 return false;
 // Run the specified command in a separate process. Return its exit value,
 // or -1 on failure (e.g. can't fork a new process).
 // Unlike system(), this function can be called from signal handler. It
 // doesn't block SIGINT et al.
-int os::fork_and_exec(char* cmd) {
+int os::fork_and_exec(char* cmd, bool use_vfork_if_available) {
 const char * argv[4] = {"sh", "-c", cmd, NULL};
-pid_t pid = fork();
+pid_t pid ;
+if (use_vfork_if_available) {
+pid = vfork();
+} else {
+pid = fork();
+}
 if (pid < 0) {
 // fork failed
 return -1;
 #endif
 static inline struct timespec get_mtime(const char* filename) {
 struct stat st;
 int ret = os::stat(filename, &st);
-assert(ret == 0, "failed to stat() file '%s': %s", filename, strerror(errno));
+assert(ret == 0, "failed to stat() file '%s': %s", filename, os::strerror(errno));
 return st.st_mtim;
 }
 int os::compare_file_modified_times(const char* file1, const char* file2) {
 struct timespec filetime1 = get_mtime(file1);
 return filetime1.tv_nsec - filetime2.tv_nsec;
 }
 return diff;
 }
+bool os::supports_map_sync() {
+return true;
+}
 /////////////// Unit tests ///////////////
 #ifndef PRODUCT
-#define test_log(...)              \
-do {                             \
-if (VerboseInternalVMTests) {  \
-tty->print_cr(__VA_ARGS__);  \
-tty->flush();                \
-}                              \
-} while (false)
 class TestReserveMemorySpecial : AllStatic {
 public:
 static void small_page_write(void* addr, size_t size) {
 size_t page_size = os::vm_page_size();
 static void test_reserve_memory_special_huge_tlbfs_only(size_t size) {
 if (!UseHugeTLBFS) {
 return;
 }
-test_log("test_reserve_memory_special_huge_tlbfs_only(" SIZE_FORMAT ")", size);
 char* addr = os::Linux::reserve_memory_special_huge_tlbfs_only(size, NULL, false);
 if (addr != NULL) {
 small_page_write(addr, size);
 // mapping will serve as a value for a "good" req_addr (case 2). The second
 // mapping, still intact, as "bad" req_addr (case 3).
 ::munmap(mapping1, mapping_size);
 // Case 1
-test_log("%s, req_addr NULL:", __FUNCTION__);
-test_log("size            align           result");
 for (int i = 0; i < num_sizes; i++) {
 const size_t size = sizes[i];
 for (size_t alignment = ag; is_aligned(size, alignment); alignment *= 2) {
 char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, NULL, false);
-test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " ->  " PTR_FORMAT " %s",
-size, alignment, p2i(p), (p != NULL ? "" : "(failed)"));
 if (p != NULL) {
 assert(is_aligned(p, alignment), "must be");
 small_page_write(p, size);
 os::Linux::release_memory_special_huge_tlbfs(p, size);
 }
 }
 }
 // Case 2
-test_log("%s, req_addr non-NULL:", __FUNCTION__);
-test_log("size            align           req_addr         result");
 for (int i = 0; i < num_sizes; i++) {
 const size_t size = sizes[i];
 for (size_t alignment = ag; is_aligned(size, alignment); alignment *= 2) {
 char* const req_addr = align_up(mapping1, alignment);
 char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, req_addr, false);
-test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " " PTR_FORMAT " ->  " PTR_FORMAT " %s",
-size, alignment, p2i(req_addr), p2i(p),
-((p != NULL ? (p == req_addr ? "(exact match)" : "") : "(failed)")));
 if (p != NULL) {
 assert(p == req_addr, "must be");
 small_page_write(p, size);
 os::Linux::release_memory_special_huge_tlbfs(p, size);
 }
 }
 }
 // Case 3
-test_log("%s, req_addr non-NULL with preexisting mapping:", __FUNCTION__);
-test_log("size            align           req_addr         result");
 for (int i = 0; i < num_sizes; i++) {
 const size_t size = sizes[i];
 for (size_t alignment = ag; is_aligned(size, alignment); alignment *= 2) {
 char* const req_addr = align_up(mapping2, alignment);
 char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, req_addr, false);
-test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " " PTR_FORMAT " ->  " PTR_FORMAT " %s",
-size, alignment, p2i(req_addr), p2i(p), ((p != NULL ? "" : "(failed)")));
 // as the area around req_addr contains already existing mappings, the API should always
 // return NULL (as per contract, it cannot return another address)
 assert(p == NULL, "must be");
 }
 }
 static void test_reserve_memory_special_shm(size_t size, size_t alignment) {
 if (!UseSHM) {
 return;
 }
-test_log("test_reserve_memory_special_shm(" SIZE_FORMAT ", " SIZE_FORMAT ")", size, alignment);
 char* addr = os::Linux::reserve_memory_special_shm(size, alignment, NULL, false);
 if (addr != NULL) {
 assert(is_aligned(addr, alignment), "Check");
 assert(is_aligned(addr, os::large_page_size()), "Check");

branch	ihse-cflags-rewrite-branch
changeset 58665	30a5049a36bb
parent 56900	d5d542d50e3c
parent 58654	562bf1878089