|
1 /* |
|
2 * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 */ |
|
23 |
|
24 #include "precompiled.hpp" |
|
25 #include "gc/z/zArray.inline.hpp" |
|
26 #include "gc/z/zBackingFile_linux_x86.hpp" |
|
27 #include "gc/z/zBackingPath_linux_x86.hpp" |
|
28 #include "gc/z/zErrno.hpp" |
|
29 #include "gc/z/zLargePages.inline.hpp" |
|
30 #include "logging/log.hpp" |
|
31 #include "runtime/init.hpp" |
|
32 #include "runtime/os.hpp" |
|
33 #include "utilities/align.hpp" |
|
34 #include "utilities/debug.hpp" |
|
35 |
|
36 #include <fcntl.h> |
|
37 #include <sys/mman.h> |
|
38 #include <sys/stat.h> |
|
39 #include <sys/statfs.h> |
|
40 #include <sys/types.h> |
|
41 #include <unistd.h> |
|
42 |
|
43 // Filesystem names |
|
44 #define ZFILESYSTEM_TMPFS "tmpfs" |
|
45 #define ZFILESYSTEM_HUGETLBFS "hugetlbfs" |
|
46 |
|
47 // Sysfs file for transparent huge page on tmpfs |
|
48 #define ZFILENAME_SHMEM_ENABLED "/sys/kernel/mm/transparent_hugepage/shmem_enabled" |
|
49 |
|
50 // Default mount points |
|
51 #define ZMOUNTPOINT_TMPFS "/dev/shm" |
|
52 #define ZMOUNTPOINT_HUGETLBFS "/hugepages" |
|
53 |
|
54 // Java heap filename |
|
55 #define ZFILENAME_HEAP "java_heap" |
|
56 |
|
57 // Support for building on older Linux systems |
|
58 #ifndef __NR_memfd_create |
|
59 #define __NR_memfd_create 319 |
|
60 #endif |
|
61 #ifndef MFD_CLOEXEC |
|
62 #define MFD_CLOEXEC 0x0001U |
|
63 #endif |
|
64 #ifndef MFD_HUGETLB |
|
65 #define MFD_HUGETLB 0x0004U |
|
66 #endif |
|
67 #ifndef O_CLOEXEC |
|
68 #define O_CLOEXEC 02000000 |
|
69 #endif |
|
70 #ifndef O_TMPFILE |
|
71 #define O_TMPFILE (020000000 | O_DIRECTORY) |
|
72 #endif |
|
73 |
|
74 // Filesystem types, see statfs(2) |
|
75 #ifndef TMPFS_MAGIC |
|
76 #define TMPFS_MAGIC 0x01021994 |
|
77 #endif |
|
78 #ifndef HUGETLBFS_MAGIC |
|
79 #define HUGETLBFS_MAGIC 0x958458f6 |
|
80 #endif |
|
81 |
|
82 static int z_memfd_create(const char *name, unsigned int flags) { |
|
83 return syscall(__NR_memfd_create, name, flags); |
|
84 } |
|
85 |
|
86 ZBackingFile::ZBackingFile() : |
|
87 _fd(-1), |
|
88 _filesystem(0), |
|
89 _initialized(false) { |
|
90 |
|
91 // Create backing file |
|
92 _fd = create_fd(ZFILENAME_HEAP); |
|
93 if (_fd == -1) { |
|
94 return; |
|
95 } |
|
96 |
|
97 // Get filesystem type |
|
98 struct statfs statfs_buf; |
|
99 if (fstatfs(_fd, &statfs_buf) == -1) { |
|
100 ZErrno err; |
|
101 log_error(gc, init)("Failed to determine filesystem type for backing file (%s)", err.to_string()); |
|
102 return; |
|
103 } |
|
104 _filesystem = statfs_buf.f_type; |
|
105 |
|
106 // Make sure we're on a supported filesystem |
|
107 if (!is_tmpfs() && !is_hugetlbfs()) { |
|
108 log_error(gc, init)("Backing file must be located on a %s or a %s filesystem", ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS); |
|
109 return; |
|
110 } |
|
111 |
|
112 // Make sure the filesystem type matches requested large page type |
|
113 if (ZLargePages::is_transparent() && !is_tmpfs()) { |
|
114 log_error(gc, init)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem", ZFILESYSTEM_TMPFS); |
|
115 return; |
|
116 } |
|
117 |
|
118 if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) { |
|
119 log_error(gc, init)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel", ZFILESYSTEM_TMPFS); |
|
120 return; |
|
121 } |
|
122 |
|
123 if (ZLargePages::is_explicit() && !is_hugetlbfs()) { |
|
124 log_error(gc, init)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled when using a %s filesystem", ZFILESYSTEM_HUGETLBFS); |
|
125 return; |
|
126 } |
|
127 |
|
128 if (!ZLargePages::is_explicit() && is_hugetlbfs()) { |
|
129 log_error(gc, init)("-XX:+UseLargePages must be enabled when using a %s filesystem", ZFILESYSTEM_HUGETLBFS); |
|
130 return; |
|
131 } |
|
132 |
|
133 // Successfully initialized |
|
134 _initialized = true; |
|
135 } |
|
136 |
|
137 int ZBackingFile::create_mem_fd(const char* name) const { |
|
138 // Create file name |
|
139 char filename[PATH_MAX]; |
|
140 snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : ""); |
|
141 |
|
142 // Create file |
|
143 const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0; |
|
144 const int fd = z_memfd_create(filename, MFD_CLOEXEC | extra_flags); |
|
145 if (fd == -1) { |
|
146 ZErrno err; |
|
147 log_debug(gc, init)("Failed to create memfd file (%s)", |
|
148 ((UseLargePages && err == EINVAL) ? "Hugepages not supported" : err.to_string())); |
|
149 return -1; |
|
150 } |
|
151 |
|
152 log_debug(gc, init)("Heap backed by file /memfd:%s", filename); |
|
153 |
|
154 return fd; |
|
155 } |
|
156 |
|
157 int ZBackingFile::create_file_fd(const char* name) const { |
|
158 const char* const filesystem = ZLargePages::is_explicit() ? ZFILESYSTEM_HUGETLBFS : ZFILESYSTEM_TMPFS; |
|
159 const char* const mountpoint = ZLargePages::is_explicit() ? ZMOUNTPOINT_HUGETLBFS : ZMOUNTPOINT_TMPFS; |
|
160 |
|
161 // Find mountpoint |
|
162 ZBackingPath path(filesystem, mountpoint); |
|
163 if (path.get() == NULL) { |
|
164 log_error(gc, init)("Use -XX:ZPath to specify the path to a %s filesystem", filesystem); |
|
165 return -1; |
|
166 } |
|
167 |
|
168 // Try to create an anonymous file using the O_TMPFILE flag. Note that this |
|
169 // flag requires kernel >= 3.11. If this fails we fall back to open/unlink. |
|
170 const int fd_anon = open(path.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR); |
|
171 if (fd_anon == -1) { |
|
172 ZErrno err; |
|
173 log_debug(gc, init)("Failed to create anonymouns file in %s (%s)", path.get(), |
|
174 (err == EINVAL ? "Not supported" : err.to_string())); |
|
175 } else { |
|
176 // Get inode number for anonymous file |
|
177 struct stat stat_buf; |
|
178 if (fstat(fd_anon, &stat_buf) == -1) { |
|
179 ZErrno err; |
|
180 log_error(gc, init)("Failed to determine inode number for anonymous file (%s)", err.to_string()); |
|
181 return -1; |
|
182 } |
|
183 |
|
184 log_debug(gc, init)("Heap backed by file %s/#" UINT64_FORMAT, path.get(), (uint64_t)stat_buf.st_ino); |
|
185 |
|
186 return fd_anon; |
|
187 } |
|
188 |
|
189 log_debug(gc, init)("Falling back to open/unlink"); |
|
190 |
|
191 // Create file name |
|
192 char filename[PATH_MAX]; |
|
193 snprintf(filename, sizeof(filename), "%s/%s.%d", path.get(), name, os::current_process_id()); |
|
194 |
|
195 // Create file |
|
196 const int fd = open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR); |
|
197 if (fd == -1) { |
|
198 ZErrno err; |
|
199 log_error(gc, init)("Failed to create file %s (%s)", filename, err.to_string()); |
|
200 return -1; |
|
201 } |
|
202 |
|
203 // Unlink file |
|
204 if (unlink(filename) == -1) { |
|
205 ZErrno err; |
|
206 log_error(gc, init)("Failed to unlink file %s (%s)", filename, err.to_string()); |
|
207 return -1; |
|
208 } |
|
209 |
|
210 log_debug(gc, init)("Heap backed by file %s", filename); |
|
211 |
|
212 return fd; |
|
213 } |
|
214 |
|
215 int ZBackingFile::create_fd(const char* name) const { |
|
216 if (ZPath == NULL) { |
|
217 // If the path is not explicitly specified, then we first try to create a memfd file |
|
218 // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might |
|
219 // not be supported at all (requires kernel >= 3.17), or it might not support large |
|
220 // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a |
|
221 // file on an accessible tmpfs or hugetlbfs mount point. |
|
222 const int fd = create_mem_fd(name); |
|
223 if (fd != -1) { |
|
224 return fd; |
|
225 } |
|
226 |
|
227 log_debug(gc, init)("Falling back to searching for an accessible moint point"); |
|
228 } |
|
229 |
|
230 return create_file_fd(name); |
|
231 } |
|
232 |
|
233 bool ZBackingFile::is_initialized() const { |
|
234 return _initialized; |
|
235 } |
|
236 |
|
237 int ZBackingFile::fd() const { |
|
238 return _fd; |
|
239 } |
|
240 |
|
241 bool ZBackingFile::is_tmpfs() const { |
|
242 return _filesystem == TMPFS_MAGIC; |
|
243 } |
|
244 |
|
245 bool ZBackingFile::is_hugetlbfs() const { |
|
246 return _filesystem == HUGETLBFS_MAGIC; |
|
247 } |
|
248 |
|
249 bool ZBackingFile::tmpfs_supports_transparent_huge_pages() const { |
|
250 // If the shmem_enabled file exists and is readable then we |
|
251 // know the kernel supports transparent huge pages for tmpfs. |
|
252 return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0; |
|
253 } |
|
254 |
|
255 bool ZBackingFile::try_split_and_expand_tmpfs(size_t offset, size_t length, size_t alignment) const { |
|
256 // Try first smaller part. |
|
257 const size_t offset0 = offset; |
|
258 const size_t length0 = align_up(length / 2, alignment); |
|
259 if (!try_expand_tmpfs(offset0, length0, alignment)) { |
|
260 return false; |
|
261 } |
|
262 |
|
263 // Try second smaller part. |
|
264 const size_t offset1 = offset0 + length0; |
|
265 const size_t length1 = length - length0; |
|
266 if (!try_expand_tmpfs(offset1, length1, alignment)) { |
|
267 return false; |
|
268 } |
|
269 |
|
270 return true; |
|
271 } |
|
272 |
|
273 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length, size_t alignment) const { |
|
274 assert(length > 0, "Invalid length"); |
|
275 assert(is_aligned(length, alignment), "Invalid length"); |
|
276 |
|
277 ZErrno err = posix_fallocate(_fd, offset, length); |
|
278 |
|
279 if (err == EINTR && length > alignment) { |
|
280 // Calling posix_fallocate() with a large length can take a long |
|
281 // time to complete. When running profilers, such as VTune, this |
|
282 // syscall will be constantly interrupted by signals. Expanding |
|
283 // the file in smaller steps avoids this problem. |
|
284 return try_split_and_expand_tmpfs(offset, length, alignment); |
|
285 } |
|
286 |
|
287 if (err) { |
|
288 log_error(gc)("Failed to allocate backing file (%s)", err.to_string()); |
|
289 return false; |
|
290 } |
|
291 |
|
292 return true; |
|
293 } |
|
294 |
|
295 bool ZBackingFile::expand_tmpfs(size_t offset, size_t length) const { |
|
296 assert(is_tmpfs(), "Wrong filesystem"); |
|
297 return try_expand_tmpfs(offset, length, os::vm_page_size()); |
|
298 } |
|
299 |
|
300 bool ZBackingFile::expand_hugetlbfs(size_t offset, size_t length) const { |
|
301 assert(is_hugetlbfs(), "Wrong filesystem"); |
|
302 |
|
303 // Prior to kernel 4.3, hugetlbfs did not support posix_fallocate(). |
|
304 // Instead of posix_fallocate() we can use a well-known workaround, |
|
305 // which involves truncating the file to requested size and then try |
|
306 // to map it to verify that there are enough huge pages available to |
|
307 // back it. |
|
308 while (ftruncate(_fd, offset + length) == -1) { |
|
309 ZErrno err; |
|
310 if (err != EINTR) { |
|
311 log_error(gc)("Failed to truncate backing file (%s)", err.to_string()); |
|
312 return false; |
|
313 } |
|
314 } |
|
315 |
|
316 // If we fail mapping during initialization, i.e. when we are pre-mapping |
|
317 // the heap, then we wait and retry a few times before giving up. Otherwise |
|
318 // there is a risk that running JVMs back-to-back will fail, since there |
|
319 // is a delay between process termination and the huge pages owned by that |
|
320 // process being returned to the huge page pool and made available for new |
|
321 // allocations. |
|
322 void* addr = MAP_FAILED; |
|
323 const int max_attempts = 3; |
|
324 for (int attempt = 1; attempt <= max_attempts; attempt++) { |
|
325 addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset); |
|
326 if (addr != MAP_FAILED || is_init_completed()) { |
|
327 // Mapping was successful or initialization phase has completed |
|
328 break; |
|
329 } |
|
330 |
|
331 ZErrno err; |
|
332 log_debug(gc)("Failed to map backing file (%s), attempt %d of %d", |
|
333 err.to_string(), attempt, max_attempts); |
|
334 |
|
335 // Wait and retry in one second, in the hope that |
|
336 // huge pages will be available by then. |
|
337 sleep(1); |
|
338 } |
|
339 |
|
340 if (addr == MAP_FAILED) { |
|
341 // Not enough huge pages left |
|
342 ZErrno err; |
|
343 log_error(gc)("Failed to map backing file (%s)", err.to_string()); |
|
344 return false; |
|
345 } |
|
346 |
|
347 // Successful mapping, unmap again. From now on the pages we mapped |
|
348 // will be reserved for this file. |
|
349 if (munmap(addr, length) == -1) { |
|
350 ZErrno err; |
|
351 log_error(gc)("Failed to unmap backing file (%s)", err.to_string()); |
|
352 return false; |
|
353 } |
|
354 |
|
355 return true; |
|
356 } |
|
357 |
|
358 bool ZBackingFile::expand(size_t offset, size_t length) const { |
|
359 return is_hugetlbfs() ? expand_hugetlbfs(offset, length) : expand_tmpfs(offset, length); |
|
360 } |