]> git.proxmox.com Git - mirror_lxc.git/commitdiff
cgroups: handle older kernels (e.g. v4.9)
authorChristian Brauner <christian.brauner@ubuntu.com>
Mon, 30 Mar 2020 15:40:16 +0000 (17:40 +0200)
committerChristian Brauner <christian.brauner@ubuntu.com>
Mon, 30 Mar 2020 16:17:41 +0000 (18:17 +0200)
On olders kernels the restrictions to move processes between cgroups are
different than they are on newer kernels. Specifically, we're running into the
following check:

if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
    !uid_eq(cred->euid, tcred->uid) &&
    !uid_eq(cred->euid, tcred->suid))
        ret = -EACCES;

which dictates that in order to move a process into a cgroup one either needs
to be global root (no restrictions apply) or the effective uid of the process
trying to move the process and the {saved}uid of the process that is supposed
to be moved need to be identical. The new attaching logic we did didn't
fulfill this criterion for because it's not present on new kernels.

Closes https://github.com/lxc/lxd/issues/7104.
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
src/lxc/cgroups/cgfsng.c
src/lxc/conf.c
src/lxc/conf.h

index 5c37c842e0feaf18dfb24af14dfe48b483d67c6c..dd5bccc7d7dc37bcd4b6b8d28c29336d6025eb37 100644 (file)
@@ -30,6 +30,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "af_unix.h"
 #include "caps.h"
 #include "cgroup.h"
 #include "cgroup2_devices.h"
@@ -2123,20 +2124,80 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t
        return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
 }
 
+static int cgroup_attach_create_leaf(const struct lxc_conf *conf,
+                                    int unified_fd, int *sk_fd)
+{
+       __do_close int sk = *sk_fd, target_fd = -EBADF;
+       ssize_t ret;
+
+       /* Create leaf cgroup. */
+       ret = mkdirat(unified_fd, ".lxc", 0755);
+       if (ret < 0 && errno != EEXIST)
+               return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\"");
+
+       target_fd = openat(unified_fd, ".lxc/cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+       if (target_fd < 0)
+               return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
+
+       ret = lxc_abstract_unix_send_fds(sk, &target_fd, 1, NULL, 0);
+       if (ret <= 0)
+               return log_error_errno(-errno, errno, "Failed to send \".lxc/cgroup.procs\" fd %d", target_fd);
+
+       return log_debug(0, "Sent target cgroup fd %d", target_fd);
+}
+
+static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
+                                       int *sk_fd, pid_t pid)
+{
+       __do_close int sk = *sk_fd, target_fd = -EBADF;
+       char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+       size_t pidstr_len;
+       ssize_t ret;
+
+       ret = lxc_abstract_unix_recv_fds(sk, &target_fd, 1, NULL, 0);
+       if (ret <= 0)
+               return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
+
+       pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
+
+       ret = lxc_write_nointr(target_fd, pidstr, pidstr_len);
+       if (ret != pidstr_len && errno != EBUSY)
+               return log_error_errno(-1, errno, "Failed to move process into target cgroup");
+
+       return log_debug(0, "Moved process into target cgroup");
+}
+
 struct userns_exec_unified_attach_data {
        const struct lxc_conf *conf;
        int unified_fd;
+       int sk_pair[2];
        pid_t pid;
 };
 
-static int cgroup_unified_attach_wrapper(void *data)
+static int cgroup_unified_attach_child_wrapper(void *data)
+{
+       struct userns_exec_unified_attach_data *args = data;
+
+       if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+           args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
+               return ret_errno(EINVAL);
+
+       close_prot_errno_disarm(args->sk_pair[0]);
+       return cgroup_attach_create_leaf(args->conf, args->unified_fd,
+                                        &args->sk_pair[1]);
+}
+
+static int cgroup_unified_attach_parent_wrapper(void *data)
 {
        struct userns_exec_unified_attach_data *args = data;
 
-       if (!args->conf || args->unified_fd < 0 || args->pid <= 0)
+       if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+           args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
                return ret_errno(EINVAL);
 
-       return cgroup_attach_leaf(args->conf, args->unified_fd, args->pid);
+       close_prot_errno_disarm(args->sk_pair[1]);
+       return cgroup_attach_move_into_leaf(args->conf, &args->sk_pair[0],
+                                           args->pid);
 }
 
 int cgroup_attach(const struct lxc_conf *conf, const char *name,
@@ -2159,7 +2220,15 @@ int cgroup_attach(const struct lxc_conf *conf, const char *name,
                        .pid            = pid,
                };
 
-               ret = userns_exec_minimal(conf, cgroup_unified_attach_wrapper, &args);
+               ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+               if (ret < 0)
+                       return -errno;
+
+               ret = userns_exec_minimal(conf,
+                                         cgroup_unified_attach_parent_wrapper,
+                                         &args,
+                                         cgroup_unified_attach_child_wrapper,
+                                         &args);
        } else {
                ret = cgroup_attach_leaf(conf, unified_fd, pid);
        }
@@ -2213,7 +2282,15 @@ static int __cg_unified_attach(const struct hierarchy *h,
                        .pid            = pid,
                };
 
-               ret = userns_exec_minimal(conf, cgroup_unified_attach_wrapper, &args);
+               ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+               if (ret < 0)
+                       return -errno;
+
+               ret = userns_exec_minimal(conf,
+                                         cgroup_unified_attach_parent_wrapper,
+                                         &args,
+                                         cgroup_unified_attach_child_wrapper,
+                                         &args);
        } else {
                ret = cgroup_attach_leaf(conf, unified_fd, pid);
        }
index 9fc5c68a7a7ff1ff4db7f7c210cc9f0f2edb7a3e..2f6be9f26363f571f4f5cb68fdaed3fa284862a2 100644 (file)
@@ -4126,7 +4126,9 @@ on_error:
        return ret;
 }
 
-int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *data)
+int userns_exec_minimal(const struct lxc_conf *conf,
+                       int (*fn_parent)(void *), void *fn_parent_data,
+                       int (*fn_child)(void *), void *fn_child_data)
 {
        call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL;
        uid_t resuid = LXC_INVALID_UID;
@@ -4136,7 +4138,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
        pid_t pid;
        int sock_fds[2];
 
-       if (!conf || !fn || !data)
+       if (!conf || !fn_child)
                return ret_errno(EINVAL);
 
        idmap = get_minimal_idmap(conf, &resuid, &resgid);
@@ -4189,7 +4191,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
                        _exit(EXIT_FAILURE);
                }
 
-               ret = fn(data);
+               ret = fn_child(fn_child_data);
                if (ret) {
                        SYSERROR("Running function in new user namespace failed");
                        _exit(EXIT_FAILURE);
@@ -4232,6 +4234,11 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
                goto on_error;
        }
 
+       if (fn_parent && fn_parent(fn_parent_data)) {
+               SYSERROR("Running parent function failed");
+               _exit(EXIT_FAILURE);
+       }
+
 on_error:
        close_prot_errno_disarm(sock_fds[0]);
        close_prot_errno_disarm(sock_fds[1]);
index 2bd2a203a6e13ebbac9f44ea1623ecdde29dba35..64885c35ea68d544f33265dee85abd28ca6d0b60 100644 (file)
@@ -467,6 +467,8 @@ extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid);
 extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
 extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
 extern int lxc_clear_namespace(struct lxc_conf *c);
-extern int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *data);
+extern int userns_exec_minimal(const struct lxc_conf *conf,
+                              int (*fn_parent)(void *), void *fn_parent_data,
+                              int (*fn_child)(void *), void *fn_child_data);
 
 #endif /* __LXC_CONF_H */