]> git.proxmox.com Git - mirror_lxc.git/commitdiff
cgroups: handle funky cgroup layouts
authorChristian Brauner <christian.brauner@ubuntu.com>
Thu, 1 Jul 2021 07:51:30 +0000 (09:51 +0200)
committerChristian Brauner <christian.brauner@ubuntu.com>
Thu, 1 Jul 2021 07:57:09 +0000 (09:57 +0200)
Old versions of Docker emulate a cgroup namespace by bind-mounting the
container's cgroup over the corresponding controller:

/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime master:11 - cgroup cgroup rw,xattr,name=systemd
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,net_cls,net_prio
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime master:16 - cgroup cgroup rw,cpu,cpuacct
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime master:17 - cgroup cgroup rw,memory
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime master:18 - cgroup cgroup rw,devices
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime master:19 - cgroup cgroup rw,hugetlb
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime master:20 - cgroup cgroup rw,perf_event
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime master:21 - cgroup cgroup rw,cpuset
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime master:22 - cgroup cgroup rw,blkio
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime master:23 - cgroup cgroup rw,pids
/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod7d4424e6_bb13_42f4_a47a_45a4828bf54d.slice/docker-d0b3604b67ac7930dd34ba3a796627e3e4717d12309e90a4afe3f38b6816ac98.scope /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime master:24 - cgroup cgroup rw,freezer

New versions of LXC always stash a file descriptor for the root of the
cgroup mount at /sys/fs/cgroup and then resolve the current cgroup
parsed from /proc/{1,self}/cgroup relative to that file descriptor. This
doesn't work when the caller's cgroup is mouned over the controllers.
Older versions of LXC simply counted such layouts as having no cgroups
available for delegation at all and moved on provided no cgroup limits
were requested. But mainline LXC would fail such layouts. While I would
argue that failing such layouts is the semantically clean approach we
shouldn't regress users so make mainline LXC treat such cgroup layouts
as having no cgroups available for delegation.

Fixes: #3890
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
src/lxc/cgroups/cgfsng.c

index bb21696e038c068459a7ad1f322c2c5ed7962c6b..2fa4952ae201aa2889cade491fe884e724d0d36d 100644 (file)
@@ -3194,8 +3194,15 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                dfd_base = open_at(dfd_mnt, current_cgroup,
                                                   PROTECT_OPATH_DIRECTORY,
                                                   PROTECT_LOOKUP_BENEATH_XDEV, 0);
-                               if (dfd_base < 0)
-                                       return syserror("Failed to open %d/%s", dfd_mnt, current_cgroup);
+                               if (dfd_base < 0) {
+                                       if (errno != ENOENT)
+                                               return syserror("Failed to open %d/%s",
+                                                               dfd_mnt, current_cgroup);
+
+                                       SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
+                                                dfd_mnt, current_cgroup);
+                                       continue;
+                               }
                                dfd = dfd_base;
                        }
 
@@ -3265,9 +3272,15 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                dfd_base = open_at(dfd_mnt, current_cgroup,
                                                   PROTECT_OPATH_DIRECTORY,
                                                   PROTECT_LOOKUP_BENEATH_XDEV, 0);
-                               if (dfd_base < 0)
-                                       return syserror("Failed to open %d/%s",
-                                                       dfd_mnt, current_cgroup);
+                               if (dfd_base < 0) {
+                                       if (errno != ENOENT)
+                                               return syserror("Failed to open %d/%s",
+                                                               dfd_mnt, current_cgroup);
+
+                                       SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
+                                                dfd_mnt, current_cgroup);
+                                       continue;
+                               }
                                dfd = dfd_base;
                        }