From 5af9369bf39931908f87fd6ad1a55e15041fe5c6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 13 Dec 2017 00:22:47 +0100 Subject: [PATCH] start: fix cgroup namespace preservation Prior to this patch we raced with a very short-lived init process. Essentially, the init process could exit before we had time to record the cgroup namespace causing the container to abort and report ABORTING to the caller when it actually started just fine. Let's not do this. (This uses syscall(SYS_getpid) in the the child to retrieve the pid just in case we're on an older glibc version and we end up in the namespace sharing branch of the actual lxc_clone() call.) Additionally this fixes the shortlived tests. They were faulty so far and should have actually failed because of the cgroup namespace recording race but the ret variable used to return from the function was not correctly initialized. This fixes it. Furthermore, the shortlived tests used the c->error_num variable to determine success or failure but this is actually not correct when the container is started daemonized. Signed-off-by: Christian Brauner --- src/lxc/criu.c | 8 +++-- src/lxc/start.c | 68 ++++++++++++++++++++++++++++++++---------- src/lxc/start.h | 2 +- src/tests/shortlived.c | 54 ++++++++++++--------------------- 4 files changed, 78 insertions(+), 54 deletions(-) diff --git a/src/lxc/criu.c b/src/lxc/criu.c index 245328ab1..ddcde5b73 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -829,7 +829,7 @@ out_unlock: */ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts, char *criu_version) { - int fd; + int fd, ret; pid_t pid; struct lxc_handler *handler; int status = 0; @@ -870,7 +870,11 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ goto out_fini_handler; } - resolve_clone_flags(handler); + ret = resolve_clone_flags(handler); + if (ret < 0) { + ERROR("%s - Unsupported clone flag specified", strerror(errno)); + goto out_fini_handler; + } if (pipe(pipes) < 0) { SYSERROR("pipe() failed"); diff --git a/src/lxc/start.c b/src/lxc/start.c index a8acfea17..f3bccb831 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -798,14 +798,14 @@ void lxc_abort(const char *name, struct lxc_handler *handler) static int do_start(void *data) { - int ret; + int fd, ret; struct lxc_list *iterator; char path[PATH_MAX]; - int devnull_fd = -1; struct lxc_handler *handler = data; bool have_cap_setgid; uid_t new_uid; gid_t new_gid; + int devnull_fd = -1; if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) { SYSERROR("Failed to set signal mask."); @@ -914,7 +914,7 @@ static int do_start(void *data) * * 8:cpuset:/ */ - if (cgns_supported()) { + if (handler->clone_flags & CLONE_NEWCGROUP) { if (unshare(CLONE_NEWCGROUP) < 0) { INFO("Failed to unshare CLONE_NEWCGROUP."); goto out_warn_father; @@ -935,13 +935,31 @@ static int do_start(void *data) /* Setup the container, ip, names, utsname, ... */ ret = lxc_setup(handler); - close(handler->data_sock[0]); close(handler->data_sock[1]); if (ret < 0) { ERROR("Failed to setup container \"%s\".", handler->name); + close(handler->data_sock[0]); goto out_warn_father; } + if (handler->clone_flags & CLONE_NEWCGROUP) { + fd = lxc_preserve_ns(syscall(SYS_getpid), "cgroup"); + if (fd < 0) { + ERROR("%s - Failed to preserve cgroup namespace", strerror(errno)); + close(handler->data_sock[0]); + goto out_warn_father; + } + + ret = lxc_abstract_unix_send_fds(handler->data_sock[0], &fd, 1, NULL, 0); + close(fd); + if (ret < 0) { + ERROR("%s - Failed to preserve cgroup namespace", strerror(errno)); + close(handler->data_sock[0]); + goto out_warn_father; + } + } + close(handler->data_sock[0]); + /* Set the label to change to when we exec(2) the container's init. */ if (lsm_process_label_set(NULL, handler->conf, 1, 1) < 0) goto out_warn_father; @@ -1123,7 +1141,7 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler) return ret; } -void resolve_clone_flags(struct lxc_handler *handler) +int resolve_clone_flags(struct lxc_handler *handler) { handler->clone_flags = CLONE_NEWNS; @@ -1155,6 +1173,17 @@ void resolve_clone_flags(struct lxc_handler *handler) handler->clone_flags |= CLONE_NEWPID; else INFO("Inheriting pid namespace"); + + if (cgns_supported()) { + if (!handler->conf->inherit_ns[LXC_NS_CGROUP]) + handler->clone_flags |= CLONE_NEWCGROUP; + else + INFO("Inheriting cgroup namespace"); + } else if (handler->conf->inherit_ns[LXC_NS_CGROUP]) { + return -EINVAL; + } + + return 0; } /* Note that this function is used with clone(CLONE_VM). Some glibc versions @@ -1236,7 +1265,11 @@ static int lxc_spawn(struct lxc_handler *handler) return -1; } - resolve_clone_flags(handler); + ret = resolve_clone_flags(handler); + if (ret < 0) { + lxc_sync_fini(handler); + return -1; + } if (handler->clone_flags & CLONE_NEWNET) { if (!lxc_list_empty(&conf->network)) { @@ -1295,6 +1328,8 @@ static int lxc_spawn(struct lxc_handler *handler) */ handler->on_clone_flags &= ~CLONE_NEWNET; } + /* The cgroup namespace gets unshare()ed not clone()ed. */ + handler->on_clone_flags &= ~CLONE_NEWCGROUP; if (share_ns) ret = lxc_clone(do_share_ns, handler, CLONE_VFORK | CLONE_VM | CLONE_FILES); @@ -1428,16 +1463,6 @@ static int lxc_spawn(struct lxc_handler *handler) if (lxc_sync_barrier_child(handler, LXC_SYNC_READY_START)) return -1; - if (cgns_supported()) { - ret = lxc_preserve_ns(handler->pid, "cgroup"); - if (ret < 0) { - ERROR("%s - Failed to preserve cgroup namespace", strerror(errno)); - goto out_delete_net; - } - handler->nsfd[LXC_NS_CGROUP] = ret; - DEBUG("Preserved cgroup namespace via fd %d", ret); - } - if (lxc_network_recv_name_and_ifindex_from_child(handler) < 0) { ERROR("Failed to receive names and ifindices for network " "devices from child"); @@ -1457,6 +1482,17 @@ static int lxc_spawn(struct lxc_handler *handler) goto out_delete_net; } + if (handler->clone_flags & CLONE_NEWCGROUP) { + ret = lxc_abstract_unix_recv_fds(handler->data_sock[1], + &handler->nsfd[LXC_NS_CGROUP], + 1, NULL, 0); + if (ret < 0) { + ERROR("%s - Failed to preserve cgroup namespace", strerror(errno)); + goto out_delete_net; + } + DEBUG("Preserved cgroup namespace via fd %d", handler->nsfd[LXC_NS_CGROUP]); + } + if (handler->ops->post_start(handler, handler->data)) goto out_abort; diff --git a/src/lxc/start.h b/src/lxc/start.h index 2161565d4..cf7b47e3f 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -136,6 +136,6 @@ extern int lxc_check_inherited(struct lxc_conf *conf, bool closeall, extern int __lxc_start(const char *, struct lxc_handler *, struct lxc_operations *, void *, const char *, bool); -extern void resolve_clone_flags(struct lxc_handler *handler); +extern int resolve_clone_flags(struct lxc_handler *handler); #endif diff --git a/src/tests/shortlived.c b/src/tests/shortlived.c index b5397e7eb..af5bb2eb7 100644 --- a/src/tests/shortlived.c +++ b/src/tests/shortlived.c @@ -95,13 +95,13 @@ again: int main(int argc, char *argv[]) { - int i; + int fd, i; const char *s; bool b; struct lxc_container *c; struct lxc_log log; char template[sizeof(P_tmpdir"/shortlived_XXXXXX")]; - int ret = 0; + int ret = EXIT_FAILURE; strcpy(template, P_tmpdir"/shortlived_XXXXXX"); i = lxc_make_tmpfile(template, false); @@ -122,12 +122,10 @@ int main(int argc, char *argv[]) if (lxc_log_init(&log)) exit(EXIT_FAILURE); - ret = 1; /* test a real container */ c = lxc_container_new(MYNAME, NULL); if (!c) { fprintf(stderr, "%d: error creating lxc_container %s\n", __LINE__, MYNAME); - ret = 1; goto out; } @@ -136,8 +134,7 @@ int main(int argc, char *argv[]) goto out; } - ret = create_container(); - if (ret) { + if (create_container() < 0) { fprintf(stderr, "%d: failed to create a container\n", __LINE__); goto out; } @@ -166,7 +163,7 @@ int main(int argc, char *argv[]) } if (!c->set_config_item(c, "lxc.execute.cmd", "echo hello")) { - fprintf(stderr, "%d: failed setting lxc.init.cmd\n", __LINE__); + fprintf(stderr, "%d: failed setting lxc.execute.cmd\n", __LINE__); goto out; } @@ -179,13 +176,6 @@ int main(int argc, char *argv[]) goto out; } - /* The container needs to exit with a successful error code. */ - if (c->error_num != 0) { - fprintf(stderr, "%d: %s exited successfully on %dth iteration\n", __LINE__, c->name, i); - goto out; - } - fprintf(stderr, "%d: %s exited correctly with error code %d on %dth iteration\n", __LINE__, c->name, c->error_num, i); - if (!c->wait(c, "STOPPED", 30)) { fprintf(stderr, "%d: %s failed to wait on %dth iteration\n", __LINE__, c->name, i); goto out; @@ -199,13 +189,6 @@ int main(int argc, char *argv[]) goto out; } - /* The container needs to exit with a successful error code. */ - if (c->error_num != 0) { - fprintf(stderr, "%d: %s exited successfully on %dth iteration\n", __LINE__, c->name, i); - goto out; - } - fprintf(stderr, "%d: %s exited correctly with error code %d on %dth iteration\n", __LINE__, c->name, c->error_num, i); - if (!c->wait(c, "STOPPED", 30)) { fprintf(stderr, "%d: %s failed to wait on %dth iteration\n", __LINE__, c->name, i); goto out; @@ -229,13 +212,6 @@ int main(int argc, char *argv[]) goto out; } - /* The container needs to exit with an error code.*/ - if (c->error_num == 0) { - fprintf(stderr, "%d: %s exited successfully on %dth iteration\n", __LINE__, c->name, i); - goto out; - } - fprintf(stderr, "%d: %s exited correctly with error code %d on %dth iteration\n", __LINE__, c->name, c->error_num, i); - if (!c->wait(c, "STOPPED", 30)) { fprintf(stderr, "%d: %s failed to wait on %dth iteration\n", __LINE__, c->name, i); goto out; @@ -252,13 +228,6 @@ int main(int argc, char *argv[]) goto out; } - /* The container needs to exit with an error code.*/ - if (c->error_num == 0) { - fprintf(stderr, "%d: %s exited successfully on %dth iteration\n", __LINE__, c->name, i); - goto out; - } - fprintf(stderr, "%d: %s exited correctly with error code %d on %dth iteration\n", __LINE__, c->name, c->error_num, i); - if (!c->wait(c, "STOPPED", 30)) { fprintf(stderr, "%d: %s failed to wait on %dth iteration\n", __LINE__, c->name, i); goto out; @@ -276,6 +245,21 @@ out: destroy_container(); } lxc_container_put(c); + + if (ret != 0) { + fd = open(template, O_RDONLY); + if (fd >= 0) { + char buf[4096]; + ssize_t buflen; + while ((buflen = read(fd, buf, 1024)) > 0) { + buflen = write(STDERR_FILENO, buf, buflen); + if (buflen <= 0) + break; + } + close(fd); + } + } + unlink(template); exit(ret); } -- 2.39.5