proc: Fix swap handling for cgroups v2 (can_use_swap)

author Alex Hudspith <alex@hudspith.io>

Mon, 6 Nov 2023 09:17:38 +0000 (09:17 +0000)

committer Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>

Wed, 27 Mar 2024 12:38:32 +0000 (13:38 +0100)
author Alex Hudspith <alex@hudspith.io>
Mon, 6 Nov 2023 09:17:38 +0000 (09:17 +0000)
committer Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Wed, 27 Mar 2024 12:38:32 +0000 (13:38 +0100)
diff --git a/src/bindings.c b/src/bindings.c

index 27c08c38616e93221088f09f50724eb54f606bf1..dc0550cadc383d2a2a2adf410587f0d36ff7a11c 100644 (file)
--- a/src/bindings.c
+++ b/src/bindings.c
@@ -866,6 +866,7 @@ static void __attribute__((constructor)) lxcfs_init(void)
  {
         __do_close int init_ns = -EBADF, root_fd = -EBADF,
                                   pidfd = -EBADF;
+       __do_free char *cgroup = NULL;
         int i = 0;
         pid_t pid;
         struct hierarchy *hierarchy;
@@ -920,7 +921,8 @@ static void __attribute__((constructor)) lxcfs_init(void)
                 lxcfs_info("Kernel supports pidfds");
         }
  
-       can_use_swap = cgroup_ops->can_use_swap(cgroup_ops);
+       cgroup = get_pid_cgroup(pid, "memory");
+       can_use_swap = cgroup && cgroup_ops->can_use_swap(cgroup_ops, cgroup);
         if (can_use_swap)
                 lxcfs_info("Kernel supports swap accounting");
         else
diff --git a/src/cgroups/cgfsng.c b/src/cgroups/cgfsng.c

index 2d583c67d2aa38bb5303fc15fc8609fa5b3eaeaf..7b732926b6250406786d89c927e948f7e9063a67 100644 (file)
--- a/src/cgroups/cgfsng.c
+++ b/src/cgroups/cgfsng.c
@@ -631,34 +631,25 @@ static int cgfsng_get_memory_slabinfo_fd(struct cgroup_ops *ops, const char *cgr
         return openat(h->fd, path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
  }
  
-static bool cgfsng_can_use_swap(struct cgroup_ops *ops)
+static bool cgfsng_can_use_swap(struct cgroup_ops *ops, const char *cgroup)
  {
-       bool has_swap = false;
+       __do_free char *cgroup_rel = NULL, *junk_value = NULL;
+       const char *file;
         struct hierarchy *h;
  
         h = ops->get_hierarchy(ops, "memory");
         if (!h)
                 return false;
  
-       if (is_unified_hierarchy(h)) {
-               if (faccessat(h->fd, "memory.swap.max", F_OK, 0))
-                       return false;
-
-               if (faccessat(h->fd, "memory.swap.current", F_OK, 0))
-                       return false;
-
-               has_swap = true;
-       } else {
-               if (faccessat(h->fd, "memory.memsw.limit_in_bytes", F_OK, 0))
-                       return false;
-
-               if (faccessat(h->fd, "memory.memsw.usage_in_bytes", F_OK, 0))
-                       return false;
-
-               has_swap = true;
-       }
-
-       return has_swap;
+       cgroup_rel = must_make_path_relative(cgroup, NULL);
+       file = is_unified_hierarchy(h) ? "memory.swap.current" : "memory.memsw.usage_in_bytes";
+       /* For v2, we need to look at the lower levels of the hierarchy because
+        * no 'memory.swap.current' file exists at the root. We must search
+        * upwards in the hierarchy in case memory accounting is disabled via
+        * cgroup.subtree_control for the given cgroup itself.
+        */
+       int ret = cgroup_walkup_to_root(ops->cgroup2_root_fd, h->fd, cgroup_rel, file, &junk_value);
+       return ret == 0;
  }
  
  static int cgfsng_get_memory_stats(struct cgroup_ops *ops, const char *cgroup,
diff --git a/src/cgroups/cgroup.h b/src/cgroups/cgroup.h

index 122e8ebfb80d1df86ca983c38996641bfce87ec6..afa7db2e422195380ec7edaefcde9957aca15e8a 100644 (file)
--- a/src/cgroups/cgroup.h
+++ b/src/cgroups/cgroup.h
@@ -148,7 +148,7 @@ struct cgroup_ops {
                                    char **value);
         int (*get_memory_slabinfo_fd)(struct cgroup_ops *ops,
                                       const char *cgroup);
-       bool (*can_use_swap)(struct cgroup_ops *ops);
+       bool (*can_use_swap)(struct cgroup_ops *ops, const char *cgroup);
  
         /* cpuset */
         int (*get_cpuset_cpus)(struct cgroup_ops *ops, const char *cgroup,
diff --git a/src/proc_fuse.c b/src/proc_fuse.c

index cb2fca2e832c12635f0cdd30a28cf55467c64508..9dedc37576d8c4d57a8a6a3bf3a1baf5861d1f34 100644 (file)
--- a/src/proc_fuse.c
+++ b/src/proc_fuse.c
@@ -459,11 +459,13 @@ static int proc_swaps_read(char *buf, size_t size, off_t offset,
         }
  
         if (wants_swap) {
-               /* The total amount of swap is always reported to be the
+               /* For cgroups v1, the total amount of swap is always reported to be the
                    lesser of the RAM+SWAP limit or the SWAP device size.
                    This is because the kernel can swap as much as it
                    wants and not only up to swtotal. */
-               swtotal = memlimit / 1024 + swtotal;
+               if (!liblxcfs_memory_is_cgroupv2())
+                       swtotal = memlimit / 1024 + swtotal;
+
                 if (hostswtotal < swtotal) {
                         swtotal = hostswtotal;
                 }
@@ -1359,11 +1361,10 @@ static int proc_meminfo_read(char *buf, size_t size, off_t offset,
  
                                 sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal);
  
-                               /* The total amount of swap is always reported to be the
+                               /* In cgroups v1, the total amount of swap is always reported to be the
                                    lesser of the RAM+SWAP limit or the SWAP device size.
                                    This is because the kernel can swap as much as it
                                    wants and not only up to swtotal. */
-
                                 if (!liblxcfs_memory_is_cgroupv2())
                                         swtotal += memlimit;
author	Alex Hudspith <alex@hudspith.io>
	Mon, 6 Nov 2023 09:17:38 +0000 (09:17 +0000)
committer	Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
	Wed, 27 Mar 2024 12:38:32 +0000 (13:38 +0100)
src/bindings.c		patch \| blob \| blame \| history
src/cgroups/cgfsng.c		patch \| blob \| blame \| history
src/cgroups/cgroup.h		patch \| blob \| blame \| history
src/proc_fuse.c		patch \| blob \| blame \| history