8077276: allocating heap with UseLargePages and HugeTLBFS may trash existing memory mappings (linux)
authorstuefe
Thu, 23 Apr 2015 18:00:50 +0200
changeset 30605 94be2f15efe5
parent 30604 b8d532cb6420
child 30606 9dae6b41c617
8077276: allocating heap with UseLargePages and HugeTLBFS may trash existing memory mappings (linux) Summary: Remove MAP_FIXED from initial mapping allocation; add tests Reviewed-by: stefank, coleenp
hotspot/src/os/linux/vm/os_linux.cpp
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Mon May 04 16:30:07 2015 +0200
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Thu Apr 23 18:00:50 2015 +0200
@@ -3474,25 +3474,69 @@
   return addr;
 }
 
+// Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed().
+// Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address
+//   (req_addr != NULL) or with a given alignment.
+//  - bytes shall be a multiple of alignment.
+//  - req_addr can be NULL. If not NULL, it must be a multiple of alignment.
+//  - alignment sets the alignment at which memory shall be allocated.
+//     It must be a multiple of allocation granularity.
+// Returns address of memory or NULL. If req_addr was not NULL, will only return
+//  req_addr or NULL.
+static char* anon_mmap_aligned(size_t bytes, size_t alignment, char* req_addr) {
+
+  size_t extra_size = bytes;
+  if (req_addr == NULL && alignment > 0) {
+    extra_size += alignment;
+  }
+
+  char* start = (char*) ::mmap(req_addr, extra_size, PROT_NONE,
+    MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE,
+    -1, 0);
+  if (start == MAP_FAILED) {
+    start = NULL;
+  } else {
+    if (req_addr != NULL) {
+      if (start != req_addr) {
+        ::munmap(start, extra_size);
+        start = NULL;
+      }
+    } else {
+      char* const start_aligned = (char*) align_ptr_up(start, alignment);
+      char* const end_aligned = start_aligned + bytes;
+      char* const end = start + extra_size;
+      if (start_aligned > start) {
+        ::munmap(start, start_aligned - start);
+      }
+      if (end_aligned < end) {
+        ::munmap(end_aligned, end - end_aligned);
+      }
+      start = start_aligned;
+    }
+  }
+  return start;
+
+}
+
+// Reserve memory using mmap(MAP_HUGETLB).
+//  - bytes shall be a multiple of alignment.
+//  - req_addr can be NULL. If not NULL, it must be a multiple of alignment.
+//  - alignment sets the alignment at which memory shall be allocated.
+//     It must be a multiple of allocation granularity.
+// Returns address of memory or NULL. If req_addr was not NULL, will only return
+//  req_addr or NULL.
 char* os::Linux::reserve_memory_special_huge_tlbfs_mixed(size_t bytes,
                                                          size_t alignment,
                                                          char* req_addr,
                                                          bool exec) {
   size_t large_page_size = os::large_page_size();
-
   assert(bytes >= large_page_size, "Shouldn't allocate large pages for small sizes");
 
-  // Allocate small pages.
-
-  char* start;
-  if (req_addr != NULL) {
-    assert(is_ptr_aligned(req_addr, alignment), "Must be");
-    assert(is_size_aligned(bytes, alignment), "Must be");
-    start = os::reserve_memory(bytes, req_addr);
-    assert(start == NULL || start == req_addr, "Must be");
-  } else {
-    start = os::reserve_memory_aligned(bytes, alignment);
-  }
+  assert(is_ptr_aligned(req_addr, alignment), "Must be");
+  assert(is_size_aligned(bytes, alignment), "Must be");
+
+  // First reserve - but not commit - the address range in small pages.
+  char* const start = anon_mmap_aligned(bytes, alignment, req_addr);
 
   if (start == NULL) {
     return NULL;
@@ -3500,13 +3544,6 @@
 
   assert(is_ptr_aligned(start, alignment), "Must be");
 
-  if (MemTracker::tracking_level() > NMT_minimal) {
-    // os::reserve_memory_special will record this memory area.
-    // Need to release it here to prevent overlapping reservations.
-    Tracker tkr = MemTracker::get_virtual_memory_release_tracker();
-    tkr.record((address)start, bytes);
-  }
-
   char* end = start + bytes;
 
   // Find the regions of the allocated chunk that can be promoted to large pages.
@@ -3526,9 +3563,9 @@
 
   int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
 
-
   void* result;
 
+  // Commit small-paged leading area.
   if (start != lp_start) {
     result = ::mmap(start, lp_start - start, prot,
                     MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
@@ -3539,11 +3576,12 @@
     }
   }
 
+  // Commit large-paged area.
   result = ::mmap(lp_start, lp_bytes, prot,
                   MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|MAP_HUGETLB,
                   -1, 0);
   if (result == MAP_FAILED) {
-    warn_on_large_pages_failure(req_addr, bytes, errno);
+    warn_on_large_pages_failure(lp_start, lp_bytes, errno);
     // If the mmap above fails, the large pages region will be unmapped and we
     // have regions before and after with small pages. Release these regions.
     //
@@ -3556,6 +3594,7 @@
     return NULL;
   }
 
+  // Commit small-paged trailing area.
   if (lp_end != end) {
     result = ::mmap(lp_end, end - lp_end, prot,
                     MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
@@ -3575,7 +3614,7 @@
                                                    bool exec) {
   assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
   assert(is_ptr_aligned(req_addr, alignment), "Must be");
-  assert(is_power_of_2(alignment), "Must be");
+  assert(is_size_aligned(alignment, os::vm_allocation_granularity()), "Must be");
   assert(is_power_of_2(os::large_page_size()), "Must be");
   assert(bytes >= os::large_page_size(), "Shouldn't allocate large pages for small sizes");
 
@@ -6086,47 +6125,100 @@
     }
   }
 
-  static void test_reserve_memory_special_huge_tlbfs_mixed(size_t size, size_t alignment) {
-    if (!UseHugeTLBFS) {
-      return;
-    }
-
-    test_log("test_reserve_memory_special_huge_tlbfs_mixed(" SIZE_FORMAT ", " SIZE_FORMAT ")",
-             size, alignment);
-
-    assert(size >= os::large_page_size(), "Incorrect input to test");
-
-    char* addr = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, NULL, false);
-
-    if (addr != NULL) {
-      small_page_write(addr, size);
-
-      os::Linux::release_memory_special_huge_tlbfs(addr, size);
-    }
-  }
-
-  static void test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(size_t size) {
-    size_t lp = os::large_page_size();
-    size_t ag = os::vm_allocation_granularity();
-
-    for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
-      test_reserve_memory_special_huge_tlbfs_mixed(size, alignment);
-    }
-  }
-
   static void test_reserve_memory_special_huge_tlbfs_mixed() {
     size_t lp = os::large_page_size();
     size_t ag = os::vm_allocation_granularity();
 
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp);
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + ag);
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + lp / 2);
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2);
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + ag);
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 - ag);
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + lp / 2);
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10);
-    test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10 + lp / 2);
+    // sizes to test
+    const size_t sizes[] = {
+      lp, lp + ag, lp + lp / 2, lp * 2,
+      lp * 2 + ag, lp * 2 - ag, lp * 2 + lp / 2,
+      lp * 10, lp * 10 + lp / 2
+    };
+    const int num_sizes = sizeof(sizes) / sizeof(size_t);
+
+    // For each size/alignment combination, we test three scenarios:
+    // 1) with req_addr == NULL
+    // 2) with a non-null req_addr at which we expect to successfully allocate
+    // 3) with a non-null req_addr which contains a pre-existing mapping, at which we
+    //    expect the allocation to either fail or to ignore req_addr
+
+    // Pre-allocate two areas; they shall be as large as the largest allocation
+    //  and aligned to the largest alignment we will be testing.
+    const size_t mapping_size = sizes[num_sizes - 1] * 2;
+    char* const mapping1 = (char*) ::mmap(NULL, mapping_size,
+      PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE,
+      -1, 0);
+    assert(mapping1 != MAP_FAILED, "should work");
+
+    char* const mapping2 = (char*) ::mmap(NULL, mapping_size,
+      PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE,
+      -1, 0);
+    assert(mapping2 != MAP_FAILED, "should work");
+
+    // Unmap the first mapping, but leave the second mapping intact: the first
+    // mapping will serve as a value for a "good" req_addr (case 2). The second
+    // mapping, still intact, as "bad" req_addr (case 3).
+    ::munmap(mapping1, mapping_size);
+
+    // Case 1
+    test_log("%s, req_addr NULL:", __FUNCTION__);
+    test_log("size            align           result");
+
+    for (int i = 0; i < num_sizes; i++) {
+      const size_t size = sizes[i];
+      for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
+        char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, NULL, false);
+        test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " ->  " PTR_FORMAT " %s",
+            size, alignment, p, (p != NULL ? "" : "(failed)"));
+        if (p != NULL) {
+          assert(is_ptr_aligned(p, alignment), "must be");
+          small_page_write(p, size);
+          os::Linux::release_memory_special_huge_tlbfs(p, size);
+        }
+      }
+    }
+
+    // Case 2
+    test_log("%s, req_addr non-NULL:", __FUNCTION__);
+    test_log("size            align           req_addr         result");
+
+    for (int i = 0; i < num_sizes; i++) {
+      const size_t size = sizes[i];
+      for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
+        char* const req_addr = (char*) align_ptr_up(mapping1, alignment);
+        char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, req_addr, false);
+        test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " " PTR_FORMAT " ->  " PTR_FORMAT " %s",
+            size, alignment, req_addr, p,
+            ((p != NULL ? (p == req_addr ? "(exact match)" : "") : "(failed)")));
+        if (p != NULL) {
+          assert(p == req_addr, "must be");
+          small_page_write(p, size);
+          os::Linux::release_memory_special_huge_tlbfs(p, size);
+        }
+      }
+    }
+
+    // Case 3
+    test_log("%s, req_addr non-NULL with preexisting mapping:", __FUNCTION__);
+    test_log("size            align           req_addr         result");
+
+    for (int i = 0; i < num_sizes; i++) {
+      const size_t size = sizes[i];
+      for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
+        char* const req_addr = (char*) align_ptr_up(mapping2, alignment);
+        char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, req_addr, false);
+        test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " " PTR_FORMAT " ->  " PTR_FORMAT " %s",
+            size, alignment, req_addr, p,
+            ((p != NULL ? "" : "(failed)")));
+        // as the area around req_addr contains already existing mappings, the API should always
+        // return NULL (as per contract, it cannot return another address)
+        assert(p == NULL, "must be");
+      }
+    }
+
+    ::munmap(mapping2, mapping_size);
+
   }
 
   static void test_reserve_memory_special_huge_tlbfs() {