src/hotspot/os_cpu/linux_x86/gc/z/zBackingFile_linux_x86.cpp
changeset 50525 767cdb97f103
child 50875 2217b2fc29ea
equal deleted inserted replaced
50524:04f4e983c2f7 50525:767cdb97f103
       
     1 /*
       
     2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20  * or visit www.oracle.com if you need additional information or have any
       
    21  * questions.
       
    22  */
       
    23 
       
    24 #include "precompiled.hpp"
       
    25 #include "gc/z/zArray.inline.hpp"
       
    26 #include "gc/z/zBackingFile_linux_x86.hpp"
       
    27 #include "gc/z/zBackingPath_linux_x86.hpp"
       
    28 #include "gc/z/zErrno.hpp"
       
    29 #include "gc/z/zLargePages.inline.hpp"
       
    30 #include "logging/log.hpp"
       
    31 #include "runtime/init.hpp"
       
    32 #include "runtime/os.hpp"
       
    33 #include "utilities/align.hpp"
       
    34 #include "utilities/debug.hpp"
       
    35 
       
    36 #include <fcntl.h>
       
    37 #include <sys/mman.h>
       
    38 #include <sys/stat.h>
       
    39 #include <sys/statfs.h>
       
    40 #include <sys/types.h>
       
    41 #include <unistd.h>
       
    42 
       
    43 // Filesystem names
       
    44 #define ZFILESYSTEM_TMPFS                "tmpfs"
       
    45 #define ZFILESYSTEM_HUGETLBFS            "hugetlbfs"
       
    46 
       
    47 // Sysfs file for transparent huge page on tmpfs
       
    48 #define ZFILENAME_SHMEM_ENABLED          "/sys/kernel/mm/transparent_hugepage/shmem_enabled"
       
    49 
       
    50 // Default mount points
       
    51 #define ZMOUNTPOINT_TMPFS                "/dev/shm"
       
    52 #define ZMOUNTPOINT_HUGETLBFS            "/hugepages"
       
    53 
       
    54 // Java heap filename
       
    55 #define ZFILENAME_HEAP                   "java_heap"
       
    56 
       
    57 // Support for building on older Linux systems
       
    58 #ifndef __NR_memfd_create
       
    59 #define __NR_memfd_create                319
       
    60 #endif
       
    61 #ifndef MFD_CLOEXEC
       
    62 #define MFD_CLOEXEC                      0x0001U
       
    63 #endif
       
    64 #ifndef MFD_HUGETLB
       
    65 #define MFD_HUGETLB                      0x0004U
       
    66 #endif
       
    67 #ifndef O_CLOEXEC
       
    68 #define O_CLOEXEC                        02000000
       
    69 #endif
       
    70 #ifndef O_TMPFILE
       
    71 #define O_TMPFILE                        (020000000 | O_DIRECTORY)
       
    72 #endif
       
    73 
       
    74 // Filesystem types, see statfs(2)
       
    75 #ifndef TMPFS_MAGIC
       
    76 #define TMPFS_MAGIC                      0x01021994
       
    77 #endif
       
    78 #ifndef HUGETLBFS_MAGIC
       
    79 #define HUGETLBFS_MAGIC                  0x958458f6
       
    80 #endif
       
    81 
       
    82 static int z_memfd_create(const char *name, unsigned int flags) {
       
    83   return syscall(__NR_memfd_create, name, flags);
       
    84 }
       
    85 
       
    86 ZBackingFile::ZBackingFile() :
       
    87     _fd(-1),
       
    88     _filesystem(0),
       
    89     _initialized(false) {
       
    90 
       
    91   // Create backing file
       
    92   _fd = create_fd(ZFILENAME_HEAP);
       
    93   if (_fd == -1) {
       
    94     return;
       
    95   }
       
    96 
       
    97   // Get filesystem type
       
    98   struct statfs statfs_buf;
       
    99   if (fstatfs(_fd, &statfs_buf) == -1) {
       
   100     ZErrno err;
       
   101     log_error(gc, init)("Failed to determine filesystem type for backing file (%s)", err.to_string());
       
   102     return;
       
   103   }
       
   104   _filesystem = statfs_buf.f_type;
       
   105 
       
   106   // Make sure we're on a supported filesystem
       
   107   if (!is_tmpfs() && !is_hugetlbfs()) {
       
   108     log_error(gc, init)("Backing file must be located on a %s or a %s filesystem", ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS);
       
   109     return;
       
   110   }
       
   111 
       
   112   // Make sure the filesystem type matches requested large page type
       
   113   if (ZLargePages::is_transparent() && !is_tmpfs()) {
       
   114     log_error(gc, init)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem", ZFILESYSTEM_TMPFS);
       
   115     return;
       
   116   }
       
   117 
       
   118   if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
       
   119     log_error(gc, init)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel", ZFILESYSTEM_TMPFS);
       
   120     return;
       
   121   }
       
   122 
       
   123   if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
       
   124     log_error(gc, init)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
       
   125     return;
       
   126   }
       
   127 
       
   128   if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
       
   129     log_error(gc, init)("-XX:+UseLargePages must be enabled when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
       
   130     return;
       
   131   }
       
   132 
       
   133   // Successfully initialized
       
   134   _initialized = true;
       
   135 }
       
   136 
       
   137 int ZBackingFile::create_mem_fd(const char* name) const {
       
   138   // Create file name
       
   139   char filename[PATH_MAX];
       
   140   snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : "");
       
   141 
       
   142   // Create file
       
   143   const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0;
       
   144   const int fd = z_memfd_create(filename, MFD_CLOEXEC | extra_flags);
       
   145   if (fd == -1) {
       
   146     ZErrno err;
       
   147     log_debug(gc, init)("Failed to create memfd file (%s)",
       
   148                         ((UseLargePages && err == EINVAL) ? "Hugepages not supported" : err.to_string()));
       
   149     return -1;
       
   150   }
       
   151 
       
   152   log_debug(gc, init)("Heap backed by file /memfd:%s", filename);
       
   153 
       
   154   return fd;
       
   155 }
       
   156 
       
   157 int ZBackingFile::create_file_fd(const char* name) const {
       
   158   const char* const filesystem = ZLargePages::is_explicit() ? ZFILESYSTEM_HUGETLBFS : ZFILESYSTEM_TMPFS;
       
   159   const char* const mountpoint = ZLargePages::is_explicit() ? ZMOUNTPOINT_HUGETLBFS : ZMOUNTPOINT_TMPFS;
       
   160 
       
   161   // Find mountpoint
       
   162   ZBackingPath path(filesystem, mountpoint);
       
   163   if (path.get() == NULL) {
       
   164     log_error(gc, init)("Use -XX:ZPath to specify the path to a %s filesystem", filesystem);
       
   165     return -1;
       
   166   }
       
   167 
       
   168   // Try to create an anonymous file using the O_TMPFILE flag. Note that this
       
   169   // flag requires kernel >= 3.11. If this fails we fall back to open/unlink.
       
   170   const int fd_anon = open(path.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
       
   171   if (fd_anon == -1) {
       
   172     ZErrno err;
       
   173     log_debug(gc, init)("Failed to create anonymouns file in %s (%s)", path.get(),
       
   174                         (err == EINVAL ? "Not supported" : err.to_string()));
       
   175   } else {
       
   176     // Get inode number for anonymous file
       
   177     struct stat stat_buf;
       
   178     if (fstat(fd_anon, &stat_buf) == -1) {
       
   179       ZErrno err;
       
   180       log_error(gc, init)("Failed to determine inode number for anonymous file (%s)", err.to_string());
       
   181       return -1;
       
   182     }
       
   183 
       
   184     log_debug(gc, init)("Heap backed by file %s/#" UINT64_FORMAT, path.get(), (uint64_t)stat_buf.st_ino);
       
   185 
       
   186     return fd_anon;
       
   187   }
       
   188 
       
   189   log_debug(gc, init)("Falling back to open/unlink");
       
   190 
       
   191   // Create file name
       
   192   char filename[PATH_MAX];
       
   193   snprintf(filename, sizeof(filename), "%s/%s.%d", path.get(), name, os::current_process_id());
       
   194 
       
   195   // Create file
       
   196   const int fd = open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
       
   197   if (fd == -1) {
       
   198     ZErrno err;
       
   199     log_error(gc, init)("Failed to create file %s (%s)", filename, err.to_string());
       
   200     return -1;
       
   201   }
       
   202 
       
   203   // Unlink file
       
   204   if (unlink(filename) == -1) {
       
   205     ZErrno err;
       
   206     log_error(gc, init)("Failed to unlink file %s (%s)", filename, err.to_string());
       
   207     return -1;
       
   208   }
       
   209 
       
   210   log_debug(gc, init)("Heap backed by file %s", filename);
       
   211 
       
   212   return fd;
       
   213 }
       
   214 
       
   215 int ZBackingFile::create_fd(const char* name) const {
       
   216   if (ZPath == NULL) {
       
   217     // If the path is not explicitly specified, then we first try to create a memfd file
       
   218     // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might
       
   219     // not be supported at all (requires kernel >= 3.17), or it might not support large
       
   220     // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a
       
   221     // file on an accessible tmpfs or hugetlbfs mount point.
       
   222     const int fd = create_mem_fd(name);
       
   223     if (fd != -1) {
       
   224       return fd;
       
   225     }
       
   226 
       
   227     log_debug(gc, init)("Falling back to searching for an accessible moint point");
       
   228   }
       
   229 
       
   230   return create_file_fd(name);
       
   231 }
       
   232 
       
   233 bool ZBackingFile::is_initialized() const {
       
   234   return _initialized;
       
   235 }
       
   236 
       
   237 int ZBackingFile::fd() const {
       
   238   return _fd;
       
   239 }
       
   240 
       
   241 bool ZBackingFile::is_tmpfs() const {
       
   242   return _filesystem == TMPFS_MAGIC;
       
   243 }
       
   244 
       
   245 bool ZBackingFile::is_hugetlbfs() const {
       
   246   return _filesystem == HUGETLBFS_MAGIC;
       
   247 }
       
   248 
       
   249 bool ZBackingFile::tmpfs_supports_transparent_huge_pages() const {
       
   250   // If the shmem_enabled file exists and is readable then we
       
   251   // know the kernel supports transparent huge pages for tmpfs.
       
   252   return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
       
   253 }
       
   254 
       
   255 bool ZBackingFile::try_split_and_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
       
   256   // Try first smaller part.
       
   257   const size_t offset0 = offset;
       
   258   const size_t length0 = align_up(length / 2, alignment);
       
   259   if (!try_expand_tmpfs(offset0, length0, alignment)) {
       
   260     return false;
       
   261   }
       
   262 
       
   263   // Try second smaller part.
       
   264   const size_t offset1 = offset0 + length0;
       
   265   const size_t length1 = length - length0;
       
   266   if (!try_expand_tmpfs(offset1, length1, alignment)) {
       
   267     return false;
       
   268   }
       
   269 
       
   270   return true;
       
   271 }
       
   272 
       
   273 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
       
   274   assert(length > 0, "Invalid length");
       
   275   assert(is_aligned(length, alignment), "Invalid length");
       
   276 
       
   277   ZErrno err = posix_fallocate(_fd, offset, length);
       
   278 
       
   279   if (err == EINTR && length > alignment) {
       
   280     // Calling posix_fallocate() with a large length can take a long
       
   281     // time to complete. When running profilers, such as VTune, this
       
   282     // syscall will be constantly interrupted by signals. Expanding
       
   283     // the file in smaller steps avoids this problem.
       
   284     return try_split_and_expand_tmpfs(offset, length, alignment);
       
   285   }
       
   286 
       
   287   if (err) {
       
   288     log_error(gc)("Failed to allocate backing file (%s)", err.to_string());
       
   289     return false;
       
   290   }
       
   291 
       
   292   return true;
       
   293 }
       
   294 
       
   295 bool ZBackingFile::expand_tmpfs(size_t offset, size_t length) const {
       
   296   assert(is_tmpfs(), "Wrong filesystem");
       
   297   return try_expand_tmpfs(offset, length, os::vm_page_size());
       
   298 }
       
   299 
       
   300 bool ZBackingFile::expand_hugetlbfs(size_t offset, size_t length) const {
       
   301   assert(is_hugetlbfs(), "Wrong filesystem");
       
   302 
       
   303   // Prior to kernel 4.3, hugetlbfs did not support posix_fallocate().
       
   304   // Instead of posix_fallocate() we can use a well-known workaround,
       
   305   // which involves truncating the file to requested size and then try
       
   306   // to map it to verify that there are enough huge pages available to
       
   307   // back it.
       
   308   while (ftruncate(_fd, offset + length) == -1) {
       
   309     ZErrno err;
       
   310     if (err != EINTR) {
       
   311       log_error(gc)("Failed to truncate backing file (%s)", err.to_string());
       
   312       return false;
       
   313     }
       
   314   }
       
   315 
       
   316   // If we fail mapping during initialization, i.e. when we are pre-mapping
       
   317   // the heap, then we wait and retry a few times before giving up. Otherwise
       
   318   // there is a risk that running JVMs back-to-back will fail, since there
       
   319   // is a delay between process termination and the huge pages owned by that
       
   320   // process being returned to the huge page pool and made available for new
       
   321   // allocations.
       
   322   void* addr = MAP_FAILED;
       
   323   const int max_attempts = 3;
       
   324   for (int attempt = 1; attempt <= max_attempts; attempt++) {
       
   325     addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
       
   326     if (addr != MAP_FAILED || is_init_completed()) {
       
   327       // Mapping was successful or initialization phase has completed
       
   328       break;
       
   329     }
       
   330 
       
   331     ZErrno err;
       
   332     log_debug(gc)("Failed to map backing file (%s), attempt %d of %d",
       
   333                   err.to_string(), attempt, max_attempts);
       
   334 
       
   335     // Wait and retry in one second, in the hope that
       
   336     // huge pages will be available by then.
       
   337     sleep(1);
       
   338   }
       
   339 
       
   340   if (addr == MAP_FAILED) {
       
   341     // Not enough huge pages left
       
   342     ZErrno err;
       
   343     log_error(gc)("Failed to map backing file (%s)", err.to_string());
       
   344     return false;
       
   345   }
       
   346 
       
   347   // Successful mapping, unmap again. From now on the pages we mapped
       
   348   // will be reserved for this file.
       
   349   if (munmap(addr, length) == -1) {
       
   350     ZErrno err;
       
   351     log_error(gc)("Failed to unmap backing file (%s)", err.to_string());
       
   352     return false;
       
   353   }
       
   354 
       
   355   return true;
       
   356 }
       
   357 
       
   358 bool ZBackingFile::expand(size_t offset, size_t length) const {
       
   359   return is_hugetlbfs() ? expand_hugetlbfs(offset, length) : expand_tmpfs(offset, length);
       
   360 }