8217338: [Containers] Improve systemd slice memory limit support
authorsgehwolf
Tue, 12 Mar 2019 10:43:27 +0100
changeset 54577 1c242c2d037f
parent 54576 aa626cbadf1b
child 54578 895a6a380484
8217338: [Containers] Improve systemd slice memory limit support Summary: Use hierachical memory limit in addition to memory_limits_in_bytes Reviewed-by: bobv, dholmes
src/hotspot/os/linux/osContainer_linux.cpp
src/hotspot/os/linux/osContainer_linux.hpp
src/java.base/linux/classes/jdk/internal/platform/cgroupv1/Metrics.java
src/java.base/linux/classes/jdk/internal/platform/cgroupv1/SubSystem.java
--- a/src/hotspot/os/linux/osContainer_linux.cpp	Wed Apr 17 07:41:12 2019 +0200
+++ b/src/hotspot/os/linux/osContainer_linux.cpp	Tue Mar 12 10:43:27 2019 +0100
@@ -122,7 +122,25 @@
     char *subsystem_path() { return _path; }
 };
 
-CgroupSubsystem* memory = NULL;
+class CgroupMemorySubsystem: CgroupSubsystem {
+ friend class OSContainer;
+
+ private:
+    /* Some container runtimes set limits via cgroup
+     * hierarchy. If set to true consider also memory.stat
+     * file if everything else seems unlimited */
+    bool _uses_mem_hierarchy;
+
+ public:
+    CgroupMemorySubsystem(char *root, char *mountpoint) : CgroupSubsystem::CgroupSubsystem(root, mountpoint) {
+      _uses_mem_hierarchy = false;
+    }
+
+    bool is_hierarchical() { return _uses_mem_hierarchy; }
+    void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }
+};
+
+CgroupMemorySubsystem* memory = NULL;
 CgroupSubsystem* cpuset = NULL;
 CgroupSubsystem* cpu = NULL;
 CgroupSubsystem* cpuacct = NULL;
@@ -131,21 +149,24 @@
 
 PRAGMA_DIAG_PUSH
 PRAGMA_FORMAT_NONLITERAL_IGNORED
-template <typename T> int subsystem_file_contents(CgroupSubsystem* c,
+template <typename T> int subsystem_file_line_contents(CgroupSubsystem* c,
                                               const char *filename,
+                                              const char *matchline,
                                               const char *scan_fmt,
                                               T returnval) {
   FILE *fp = NULL;
   char *p;
   char file[MAXPATHLEN+1];
   char buf[MAXPATHLEN+1];
+  char discard[MAXPATHLEN+1];
+  bool found_match = false;
 
   if (c == NULL) {
-    log_debug(os, container)("subsystem_file_contents: CgroupSubsytem* is NULL");
+    log_debug(os, container)("subsystem_file_line_contents: CgroupSubsytem* is NULL");
     return OSCONTAINER_ERROR;
   }
   if (c->subsystem_path() == NULL) {
-    log_debug(os, container)("subsystem_file_contents: subsystem path is NULL");
+    log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");
     return OSCONTAINER_ERROR;
   }
 
@@ -160,16 +181,32 @@
   log_trace(os, container)("Path to %s is %s", filename, file);
   fp = fopen(file, "r");
   if (fp != NULL) {
-    p = fgets(buf, MAXPATHLEN, fp);
-    if (p != NULL) {
-      int matched = sscanf(p, scan_fmt, returnval);
-      if (matched == 1) {
+    int err = 0;
+    while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
+      found_match = false;
+      if (matchline == NULL) {
+        // single-line file case
+        int matched = sscanf(p, scan_fmt, returnval);
+        found_match = (matched == 1);
+      } else {
+        // multi-line file case
+        if (strstr(p, matchline) != NULL) {
+          // discard matchline string prefix
+          int matched = sscanf(p, scan_fmt, discard, returnval);
+          found_match = (matched == 2);
+        } else {
+          continue; // substring not found
+        }
+      }
+      if (found_match) {
         fclose(fp);
         return 0;
       } else {
+        err = 1;
         log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
       }
-    } else {
+    }
+    if (err == 0) {
       log_debug(os, container)("Empty file %s", file);
     }
   } else {
@@ -186,10 +223,11 @@
   return_type variable;                                                   \
 {                                                                         \
   int err;                                                                \
-  err = subsystem_file_contents(subsystem,                                \
-                                filename,                                 \
-                                scan_fmt,                                 \
-                                &variable);                               \
+  err = subsystem_file_line_contents(subsystem,                           \
+                                     filename,                            \
+                                     NULL,                                \
+                                     scan_fmt,                            \
+                                     &variable);                          \
   if (err != 0)                                                           \
     return (return_type) OSCONTAINER_ERROR;                               \
                                                                           \
@@ -201,16 +239,33 @@
   char variable[bufsize];                                                 \
 {                                                                         \
   int err;                                                                \
-  err = subsystem_file_contents(subsystem,                                \
-                                filename,                                 \
-                                scan_fmt,                                 \
-                                variable);                                \
+  err = subsystem_file_line_contents(subsystem,                           \
+                                     filename,                            \
+                                     NULL,                                \
+                                     scan_fmt,                            \
+                                     variable);                           \
   if (err != 0)                                                           \
     return (return_type) NULL;                                            \
                                                                           \
   log_trace(os, container)(logstring, variable);                          \
 }
 
+#define GET_CONTAINER_INFO_LINE(return_type, subsystem, filename,         \
+                           matchline, logstring, scan_fmt, variable)      \
+  return_type variable;                                                   \
+{                                                                         \
+  int err;                                                                \
+  err = subsystem_file_line_contents(subsystem,                           \
+                                filename,                                 \
+                                matchline,                                \
+                                scan_fmt,                                 \
+                                &variable);                               \
+  if (err != 0)                                                           \
+    return (return_type) OSCONTAINER_ERROR;                               \
+                                                                          \
+  log_trace(os, container)(logstring, variable);                          \
+}
+
 /* init
  *
  * Initialize the container support and determine if
@@ -266,7 +321,7 @@
     }
     while ((token = strsep(&cptr, ",")) != NULL) {
       if (strcmp(token, "memory") == 0) {
-        memory = new CgroupSubsystem(tmproot, tmpmount);
+        memory = new CgroupMemorySubsystem(tmproot, tmpmount);
       } else if (strcmp(token, "cpuset") == 0) {
         cpuset = new CgroupSubsystem(tmproot, tmpmount);
       } else if (strcmp(token, "cpu") == 0) {
@@ -344,6 +399,10 @@
     while ((token = strsep(&controllers, ",")) != NULL) {
       if (strcmp(token, "memory") == 0) {
         memory->set_subsystem_path(base);
+        jlong hierarchy = uses_mem_hierarchy();
+        if (hierarchy > 0) {
+          memory->set_hierarchical(true);
+        }
       } else if (strcmp(token, "cpuset") == 0) {
         cpuset->set_subsystem_path(base);
       } else if (strcmp(token, "cpu") == 0) {
@@ -360,6 +419,7 @@
   // command line arguments have been processed.
   if ((mem_limit = memory_limit_in_bytes()) > 0) {
     os::Linux::set_physical_memory(mem_limit);
+    log_info(os, container)("Memory Limit is: " JLONG_FORMAT, mem_limit);
   }
 
   _is_containerized = true;
@@ -374,6 +434,21 @@
   }
 }
 
+/* uses_mem_hierarchy
+ *
+ * Return whether or not hierarchical cgroup accounting is being
+ * done.
+ *
+ * return:
+ *    A number > 0 if true, or
+ *    OSCONTAINER_ERROR for not supported
+ */
+jlong OSContainer::uses_mem_hierarchy() {
+  GET_CONTAINER_INFO(jlong, memory, "/memory.use_hierarchy",
+                    "Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy);
+  return use_hierarchy;
+}
+
 
 /* memory_limit_in_bytes
  *
@@ -389,7 +464,18 @@
                      "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);
 
   if (memlimit >= _unlimited_memory) {
-    log_trace(os, container)("Memory Limit is: Unlimited");
+    log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
+    if (memory->is_hierarchical()) {
+      const char* matchline = "hierarchical_memory_limit";
+      char* format = "%s " JULONG_FORMAT;
+      GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,
+                             "Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)
+      if (hier_memlimit >= _unlimited_memory) {
+        log_trace(os, container)("Hierarchical Memory Limit is: Unlimited");
+      } else {
+        return (jlong)hier_memlimit;
+      }
+    }
     return (jlong)-1;
   }
   else {
@@ -401,7 +487,18 @@
   GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes",
                      "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);
   if (memswlimit >= _unlimited_memory) {
-    log_trace(os, container)("Memory and Swap Limit is: Unlimited");
+    log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited");
+    if (memory->is_hierarchical()) {
+      const char* matchline = "hierarchical_memsw_limit";
+      char* format = "%s " JULONG_FORMAT;
+      GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,
+                             "Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit)
+      if (hier_memlimit >= _unlimited_memory) {
+        log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited");
+      } else {
+        return (jlong)hier_memlimit;
+      }
+    }
     return (jlong)-1;
   } else {
     return (jlong)memswlimit;
--- a/src/hotspot/os/linux/osContainer_linux.hpp	Wed Apr 17 07:41:12 2019 +0200
+++ b/src/hotspot/os/linux/osContainer_linux.hpp	Tue Mar 12 10:43:27 2019 +0100
@@ -42,6 +42,7 @@
   static inline bool is_containerized();
   static const char * container_type();
 
+  static jlong uses_mem_hierarchy();
   static jlong memory_limit_in_bytes();
   static jlong memory_and_swap_limit_in_bytes();
   static jlong memory_soft_limit_in_bytes();
--- a/src/java.base/linux/classes/jdk/internal/platform/cgroupv1/Metrics.java	Wed Apr 17 07:41:12 2019 +0200
+++ b/src/java.base/linux/classes/jdk/internal/platform/cgroupv1/Metrics.java	Tue Mar 12 10:43:27 2019 +0100
@@ -25,15 +25,16 @@
 
 package jdk.internal.platform.cgroupv1;
 
-import java.io.BufferedReader;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.stream.Stream;
 
+import jdk.internal.platform.cgroupv1.SubSystem.MemorySubSystem;
+
 public class Metrics implements jdk.internal.platform.Metrics {
-    private SubSystem memory;
+    private MemorySubSystem memory;
     private SubSystem cpu;
     private SubSystem cpuacct;
     private SubSystem cpuset;
@@ -133,7 +134,7 @@
         for (String subsystemName: subsystemNames) {
             switch (subsystemName) {
                 case "memory":
-                    metric.setMemorySubSystem(new SubSystem(mountentry[3], mountentry[4]));
+                    metric.setMemorySubSystem(new MemorySubSystem(mountentry[3], mountentry[4]));
                     break;
                 case "cpuset":
                     metric.setCpuSetSubSystem(new SubSystem(mountentry[3], mountentry[4]));
@@ -195,6 +196,11 @@
 
         if (subsystem != null) {
             subsystem.setPath(base);
+            if (subsystem instanceof MemorySubSystem) {
+                MemorySubSystem memorySubSystem = (MemorySubSystem)subsystem;
+                boolean isHierarchial = getHierarchical(memorySubSystem);
+                memorySubSystem.setHierarchical(isHierarchial);
+            }
             metric.setActiveSubSystems();
         }
         if (subsystem2 != null) {
@@ -203,6 +209,11 @@
     }
 
 
+    private static boolean getHierarchical(MemorySubSystem subsystem) {
+        long hierarchical = SubSystem.getLongValue(subsystem, "memory.use_hierarchy");
+        return hierarchical > 0;
+    }
+
     private void setActiveSubSystems() {
         activeSubSystems = true;
     }
@@ -211,7 +222,7 @@
         return activeSubSystems;
     }
 
-    private void setMemorySubSystem(SubSystem memory) {
+    private void setMemorySubSystem(MemorySubSystem memory) {
         this.memory = memory;
     }
 
@@ -366,9 +377,29 @@
 
     public long getMemoryLimit() {
         long retval = SubSystem.getLongValue(memory, "memory.limit_in_bytes");
+        if (retval > unlimited_minimum) {
+            if (memory.isHierarchical()) {
+                // memory.limit_in_bytes returned unlimited, attempt
+                // hierarchical memory limit
+                String match = "hierarchical_memory_limit";
+                retval = SubSystem.getLongValueMatchingLine(memory,
+                                                            "memory.stat",
+                                                            match,
+                                                            Metrics::convertHierachicalLimitLine);
+            }
+        }
         return retval > unlimited_minimum ? -1L : retval;
     }
 
+    public static long convertHierachicalLimitLine(String line) {
+        String[] tokens = line.split("\\s");
+        if (tokens.length == 2) {
+            String strVal = tokens[1];
+            return SubSystem.convertStringToLong(strVal);
+        }
+        return unlimited_minimum + 1; // unlimited
+    }
+
     public long getMemoryMaxUsage() {
         return SubSystem.getLongValue(memory, "memory.max_usage_in_bytes");
     }
@@ -417,6 +448,17 @@
 
     public long getMemoryAndSwapLimit() {
         long retval = SubSystem.getLongValue(memory, "memory.memsw.limit_in_bytes");
+        if (retval > unlimited_minimum) {
+            if (memory.isHierarchical()) {
+                // memory.memsw.limit_in_bytes returned unlimited, attempt
+                // hierarchical memory limit
+                String match = "hierarchical_memsw_limit";
+                retval = SubSystem.getLongValueMatchingLine(memory,
+                                                            "memory.stat",
+                                                            match,
+                                                            Metrics::convertHierachicalLimitLine);
+            }
+        }
         return retval > unlimited_minimum ? -1L : retval;
     }
 
--- a/src/java.base/linux/classes/jdk/internal/platform/cgroupv1/SubSystem.java	Wed Apr 17 07:41:12 2019 +0200
+++ b/src/java.base/linux/classes/jdk/internal/platform/cgroupv1/SubSystem.java	Tue Mar 12 10:43:27 2019 +0100
@@ -29,10 +29,11 @@
 import java.io.IOException;
 import java.math.BigInteger;
 import java.nio.file.Files;
-import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
+import java.util.List;
 import java.util.Optional;
+import java.util.function.Function;
 import java.util.stream.Stream;
 
 public class SubSystem {
@@ -99,10 +100,32 @@
 
     }
 
+    public static long getLongValueMatchingLine(SubSystem subsystem,
+                                                     String param,
+                                                     String match,
+                                                     Function<String, Long> conversion) {
+        long retval = Metrics.unlimited_minimum + 1; // default unlimited
+        try {
+            List<String> lines = Files.readAllLines(Paths.get(subsystem.path(), param));
+            for (String line: lines) {
+                if (line.contains(match)) {
+                    retval = conversion.apply(line);
+                    break;
+                }
+            }
+        } catch (IOException e) {
+            // Ignore. Default is unlimited.
+        }
+        return retval;
+    }
+
     public static long getLongValue(SubSystem subsystem, String parm) {
         String strval = getStringValue(subsystem, parm);
+        return convertStringToLong(strval);
+    }
+
+    public static long convertStringToLong(String strval) {
         long retval = 0;
-
         if (strval == null) return 0L;
 
         try {
@@ -215,4 +238,22 @@
 
         return ints;
     }
+
+    public static class MemorySubSystem extends SubSystem {
+
+        private boolean hierarchical;
+
+        public MemorySubSystem(String root, String mountPoint) {
+            super(root, mountPoint);
+        }
+
+        boolean isHierarchical() {
+            return hierarchical;
+        }
+
+        void setHierarchical(boolean hierarchical) {
+            this.hierarchical = hierarchical;
+        }
+
+    }
 }